Web_Scraping Project
MagicBricks_website Data_Collection
# Import Libraries
import requests
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd
import re
# Code for the collection of the data
# here list of 4 cities are taken and collection of data is done for them
cities = ['bangalore','mumbai','chennai','visakhapatnam']
price=[]
per_sqt=[]
BHK=[]
Carpet_Area=[]
Owner=[]
Floor=[]
Area=[]
City=[]
for j in cities:
url = "https://www.magicbricks.com/flats-in-"+j+"-for-sale-pppfs"
print(url)
page = requests.get(url)
soup = BeautifulSoup(page.text)
for i in soup.find_all("div",class_="mb-srp__card__price--amount"):
price.append(i.text)
for i in soup.find_all("div",class_="mb-srp__card__price--size"):
per_sqt.append(i.text.split(' ')[0])
for i in soup.find_all("h2",class_="mb-srp__card--title"):
BHK.append(i.text[0])
for i in soup.find_all("div",class_="mb-srp__card__summary--value"):
a =i.text
b = re.findall(r"(\d+)\ssqft",str(a))
if b:
Carpet_Area.append(b[0])
for i in soup.find_all('div',class_='mb-srp__card__ads--name'):
Owner.append(i.text.split(':')[1])
for i in soup.find_all("div",class_="mb-srp__card__summary__list--item"):
a = i.text
b = re.findall(r"\w+\s\w+\s\w+\s\d+",a)
if b:
Floor.append(b[0])
for i in soup.find_all("h2",class_="mb-srp__card--title"):
Area.append(i.text.split('in')[1].split(',')[0])
City.append(j)
print(i,'---->', len(Owner))
print(i,'---->', len(Floor))
print(i,'---->', len(per_sqt))
https://www.magicbricks.com/flats-in-bangalore-for-sale-pppfs <h2 class="mb-srp__card--title">2 BHK Flat for Sale in Yelahanka , Bangalore</h2> ----> 30 <h2 class="mb-srp__card--title">2 BHK Flat for Sale in Yelahanka , Bangalore</h2> ----> 30 <h2 class="mb-srp__card--title">2 BHK Flat for Sale in Yelahanka , Bangalore</h2> ----> 29 https://www.magicbricks.com/flats-in-mumbai-for-sale-pppfs <h2 class="mb-srp__card--title">2 BHK Flat for Sale in Mulund West , Mumbai</h2> ----> 60 <h2 class="mb-srp__card--title">2 BHK Flat for Sale in Mulund West , Mumbai</h2> ----> 58 <h2 class="mb-srp__card--title">2 BHK Flat for Sale in Mulund West , Mumbai</h2> ----> 52 https://www.magicbricks.com/flats-in-chennai-for-sale-pppfs <h2 class="mb-srp__card--title">2 BHK Flat for Sale in Chennai</h2> ----> 90 <h2 class="mb-srp__card--title">2 BHK Flat for Sale in Chennai</h2> ----> 88 <h2 class="mb-srp__card--title">2 BHK Flat for Sale in Chennai</h2> ----> 81 https://www.magicbricks.com/flats-in-visakhapatnam-for-sale-pppfs <h2 class="mb-srp__card--title">2 BHK Flat for Sale in MVP Colony , Visakhapatnam</h2> ----> 120 <h2 class="mb-srp__card--title">2 BHK Flat for Sale in MVP Colony , Visakhapatnam</h2> ----> 118 <h2 class="mb-srp__card--title">2 BHK Flat for Sale in MVP Colony , Visakhapatnam</h2> ----> 110
# printing to see the values are obtained are not
print(price)
print(per_sqt)
print(BHK)
print(Carpet_Area)
print(Owner)
print(Floor)
print(Area)
print(City)
# Finding the length to see is all the lengths are same
print(len(price))
print(len(per_sqt))
print(len(BHK))
print(len(Carpet_Area))
print(len(Owner))
print(len(Floor))
print(len(Area))
print(len(City))
['₹60 Lac ', '₹80 Lac ', '₹48 Lac ', '₹1.35 Cr ', '₹61 Lac ', '₹1.30 Cr ', '₹50 Lac ', '₹1.80 Cr ', '₹1 Cr ', '₹1.02 Cr ', '₹1.45 Cr ', '₹3.85 Cr ', '₹70 Lac ', '₹85 Lac ', '₹98 Lac ', '₹60 Lac ', '₹78 Lac ', '₹57 Lac ', '₹1.08 Cr ', '₹57 Lac ', '₹35 Lac ', '₹1.80 Cr ', '₹2.75 Cr ', '₹1.12 Cr ', '₹1 Cr ', '₹1.45 Cr ', '₹80 Lac ', '₹1.35 Cr ', '₹2.10 Cr ', '₹51.2 Lac ', '₹73 Lac ', '₹22 Lac ', '₹2.25 Cr ', '₹2.70 Cr ', '₹41 Lac ', '₹5.99 Cr ', '₹1.20 Cr ', '₹1.30 Cr ', '₹65 Lac ', '₹3.75 Cr ', '₹2.70 Cr ', '₹1.02 Cr ', '₹95 Lac ', '₹60 Lac ', '₹15 Cr ', '₹32 Lac ', '₹98 Lac ', '₹4.89 Cr ', '₹44.9 Lac ', '₹70 Lac ', '₹1.05 Cr ', '₹30 Lac ', '₹1.25 Cr ', '₹1.90 Cr ', '₹1.55 Cr ', '₹1.28 Cr ', '₹2 Cr ', '₹2.52 Cr ', '₹1 Cr ', '₹1.60 Cr ', '₹55 Lac ', '₹80 Lac ', '₹60 Lac ', '₹60 Lac ', '₹85 Lac ', '₹49 Lac ', '₹55 Lac ', '₹53 Lac ', '₹69 Lac ', '₹1.25 Cr ', '₹87 Lac ', '₹32 Lac ', '₹71 Lac ', '₹85 Lac ', '₹47 Lac ', '₹46 Lac ', '₹43 Lac ', '₹1.15 Cr ', '₹27 Lac ', '₹29.7 Lac ', '₹59 Lac ', '₹45 Lac ', '₹35 Lac ', '₹74 Lac ', '₹39 Lac ', '₹46 Lac ', '₹1.30 Cr ', '₹40 Lac ', '₹70 Lac ', '₹74.9 Lac ', '₹47.5 Lac ', '₹72 Lac ', '₹97 Lac ', '₹63 Lac ', '₹35 Lac ', '₹63.6 Lac ', '₹47 Lac ', '₹1 Cr ', '₹38 Lac ', '₹30 Lac ', '₹97.5 Lac ', '₹30 Lac ', '₹30 Lac ', '₹55 Lac ', '₹60 Lac ', '₹73 Lac ', '₹45 Lac ', '₹89 Lac ', '₹70 Lac ', '₹49 Lac ', '₹43.6 Lac ', '₹56 Lac ', '₹97 Lac ', '₹47 Lac ', '₹78 Lac ', '₹38.9 Lac ', '₹26 Lac ', '₹42 Lac ', '₹38 Lac ', '₹78 Lac '] ['₹4950', '₹6957', '₹4528', '₹7542', '₹5706', '₹7303', '₹5000', '₹11335', '₹8889', '₹6997', '₹8146', '₹20632', '₹5655', '₹6071', '₹9800', '₹6367', '₹5158', '₹6374', '₹4553', '₹4545', '₹7500', '₹12500', '₹7705', '₹5305', '₹7436', '₹5333', '₹7418', '₹10194', '₹4650', '₹26471', '₹23276', '₹16667', nan, '₹23636', '₹37500', '₹27136', '₹18545', '₹60000', '₹5047', nan, '₹11529', '₹38459', '₹7242', nan, '₹10000', '₹14583', '₹5376', '₹12042', '₹13669', '₹19375', '₹21333', '₹33333', '₹22951', '₹10695', '₹25316', '₹5268', '₹8658', '₹5000', '₹4444', '₹8947', nan, '₹4895', '₹6111', '₹5268', '₹5712', '₹8367', '₹8406', nan, '₹5086', '₹9140', '₹6112', '₹7931', '₹4335', '₹11616', '₹4060', '₹3999', '₹7584', '₹5294', '₹6481', '₹6198', '₹4343', '₹6571', '₹11818', '₹5249', '₹6763', '₹8000', '₹3242', '₹5143', '₹4615', '₹4000', '₹4667', '₹4130', '₹4087', '₹7634', '₹3619', '₹3046', '₹7500', '₹5000', '₹3000', '₹3667', '₹3750', '₹4056', '₹3309', '₹5235', '₹4118', '₹3250', '₹6054', '₹5173', '₹4215', '₹4756', '₹3700', '₹2600', '₹4667', '₹4222', '₹7091', nan, nan, nan, nan, nan] ['3', '2', '2', '3', '2', '3', '2', '3', '3', '2', '3', '3', '3', '2', '2', '2', '2', '2', '3', '3', '2', '4', '3', '3', '3', '3', '3', '3', '3', '2', '1', '1', '2', '3', '1', '3', '2', '1', '1', '2', '2', '1', '1', '2', '4', '2', '2', '3', '1', '2', '1', '1', '2', '3', '2', '2', '1', '2', '2', '2', '2', '2', '3', '2', '2', '2', '3', '2', '3', '2', '2', '1', '2', '2', '2', '1', '2', '2', '1', '2', '2', '2', '1', '3', '2', '2', '2', '2', '2', '2', '3', '2', '3', '3', '2', '3', '2', '3', '2', '2', '3', '1', '2', '3', '3', '3', '3', '3', '3', '2', '2', '2', '3', '2', '3', '3', '2', '2', '2', '2'] ['1000', '1150', '1060', '1790', '803', '1640', '1250', '1588', '1000', '1465', '1500', '1418', '1350', '1000', '1250', '857', '753', '1186', '1250', '770', '2150', '1900', '1460', '1885', '1950', '1250', '1820', '2060', '740', '331', '310', '610', '1160', '665', '1052', '685', '400', '332', '776', '800', '450', '430', '675', '1870', '820', '850', '1172', '404', '575', '507', '558', '680', '1130', '562', '535', '590', '792', '935', '632', '1044', '800', '1200', '1350', '950', '1001', '900', '800', '852', '1494', '700', '634', '1395', '930', '464', '580', '509', '990', '445', '469', '778', '850', '540', '1194', '898', '700', '600', '762', '1035', '800', '1300', '823', '1700', '1575', '700', '1540', '1150', '1310', '1000', '915', '1300', '600', '1000', '1300', '1516', '1800', '1100', '1620', '1600', '970', '1133', '925', '1875', '1050', '1640', '840', '800', '900', '650', '800', nan] [' Omkar Pandey', ' gokul', ' Thayumanavan', ' Prasanna', ' Akash Akash', ' av nath', ' Sangeeta Pillai', ' ganga k', ' Anjan K', ' Omesh Saraf', ' Mohd Hussain ', ' Rahul Jain', ' Gaurav Kumar GAURAV', ' Ancy', ' Anoop', ' geetha', ' Omkar Omkar', ' ruchika', ' jeswanth', ' vikas saxena', ' Ranju', ' tezal', ' shravan', ' Bhav', ' Nagananda', ' MD Fuzail', ' ashwanth', ' Partha Sarma', ' Shekar', ' Sashank Constructions', ' K SH', ' NIYATI', ' Sachin Kunder', ' Motashaw Motashaw', ' MONCY BHASKAR', ' Anam khan', ' Milind Desai', ' Jas Baljeet J', ' Gangaram Dhuri', ' Satyajit Satyajit', ' vasant ahir', ' Rajendra', ' Aditi Shah', ' Haroon mansuri', ' kishor kishor', ' Medha Naik', ' Anita', ' jagdish Dassani', ' Raghav Sharma', ' nandan lanjekar', ' amit chalke', ' Arun Iyer', ' Ritu', ' Khuzema Tajir', ' Dipti solanki', ' sujatha', ' Aysha Khan', ' DHRUV SHARMA', ' Mumtaz Nazim', ' Savith Raghavan Savith Raghavan', ' Yogesh Baskaran', ' K VINODH KUMAR', ' GURURAJAN BABU', ' Vijay Jaya', ' siva', ' Prasanna', ' M DINESH KANNA', ' Elango Rajendran', ' Dev', ' vinothan', ' Owner', ' M G S Jayamalathi', ' Mahalingam', ' Kads', ' YOGEESWARAN', ' Kuppuswamy Bj', ' Jayalakshmi', ' Vidhya vidhya', ' Sujith', ' sakthi', ' Santhosh', ' Santhosh Developers', ' R Premalatha', ' Jebaselvan Navaraj', ' Ramkumar Gopalsamy', ' ram', ' Faizal', ' Vimal Kumar S', ' Venugopal', ' PURUSHOTHAMAN', ' Hari', ' Sl Prasanna', ' Sonia', ' Ramana Murthy', ' s bhargavi', ' Lakshman', ' Devi', ' jitendra sai', ' Ram', ' vakada ramana', ' Ravindra Pamidi', ' Praveen', ' Sai Stylish', ' MADHU', ' Uma Chavali', ' TEJARAM THRI', ' Jagan', ' Rakesh Power', ' Maheshwari', ' RAMACHANDRA RAO', ' Jaswanth', ' Srikanth', ' Akshay Kumar', ' swamy m', ' lalitha dhulipala', ' Vision Properties', ' Honey Group', ' New Living Properties', ' podilapu simhachalam', ' Dinakar'] ['FloorGround out of 4', 'FloorGround out of 4', 'Floor4 out of 4', 'Floor14 out of 15', 'Floor2 out of 4', 'Floor1 out of 4', 'Floor3 out of 5', 'Basement out of 17', 'Floor3 out of 14', 'Floor2 out of 10', 'FloorGround out of 7', 'Floor16 out of 16', 'FloorGround out of 5', 'Floor1 out of 7', 'Floor2 out of 4', 'Floor2 out of 5', 'Floor3 out of 6', 'Floor1 out of 7', 'Floor1 out of 4', 'Floor6 out of 12', 'Floor9 out of 9', 'Floor1 out of 4', 'Floor2 out of 4', 'Floor5 out of 5', 'Floor2 out of 4', 'Floor1 out of 3', 'Floor13 out of 27', 'Floor5 out of 13', 'Floor3 out of 3', 'FloorGround out of 4', 'Floor8 out of 9', 'Floor1 out of 5', 'Floor5 out of 15', 'Floor4 out of 7', 'FloorGround out of 76', 'Floor4 out of 4', 'FloorGround out of 3', 'Floor1 out of 6', 'Floor5 out of 20', 'Floor6 out of 12', 'Floor7 out of 7', 'FloorGround out of 4', 'Floor1 out of 3', 'Floor21 out of 21', 'Floor3 out of 3', 'Floor7 out of 21', 'Floor5 out of 7', 'Floor5 out of 7', 'Floor4 out of 15', 'Floor9 out of 15', 'Floor17 out of 18', 'Floor5 out of 12', 'Floor7 out of 7', 'Floor15 out of 23', 'Floor3 out of 4', 'Floor2 out of 13', 'Floor2 out of 4', 'Floor1 out of 11', 'Floor1 out of 4', 'Floor1 out of 2', 'FloorGround out of 2', 'Floor2 out of 2', 'Floor1 out of 2', 'Floor1 out of 2', 'Floor1 out of 2', 'Floor4 out of 4', 'Floor3 out of 5', 'Floor16 out of 16', 'Floor3 out of 4', 'Floor1 out of 2', 'Floor4 out of 4', 'Floor3 out of 4', 'Floor1 out of 2', 'Floor2 out of 4', 'Floor2 out of 2', 'Floor2 out of 3', 'Floor2 out of 3', 'Floor1 out of 4', 'Floor1 out of 3', 'Floor1 out of 3', 'Floor1 out of 1', 'FloorGround out of 2', 'Floor1 out of 2', 'Floor1 out of 1', 'Floor1 out of 2', 'Floor1 out of 1', 'Floor1 out of 4', 'Floor1 out of 4', 'Floor2 out of 5', 'Floor10 out of 16', 'Floor15 out of 16', 'FloorGround out of 6', 'Floor2 out of 3', 'Floor3 out of 5', 'Floor4 out of 4', 'Floor1 out of 5', 'Floor2 out of 4', 'Floor2 out of 5', 'Floor2 out of 5', 'Basement out of 3', 'Floor2 out of 5', 'Floor3 out of 5', 'Floor1 out of 5', 'Floor2 out of 5', 'Floor1 out of 5', 'Floor5 out of 5', 'Floor1 out of 5', 'Floor2 out of 4', 'Floor2 out of 10', 'Floor4 out of 5', 'Floor2 out of 3', 'Floor1 out of 5', 'Floor4 out of 5', 'Floor2 out of 5', 'Floor1 out of 5', 'Floor3 out of 5', 'Floor5 out of 5', 'Floor3 out of 5', nan, nan] [' Kattigenahalli', ' Ayodaya Nagar', ' Electronic City ', ' Tumkur Road ', ' Kudlu Gate ', ' Hal Stage 2', ' Murugeshpalya', ' Begur Road ', ' Judicial Layout', ' Bellandur', ' Kasturi Nagar ', ' Koramangala Block 1 ', ' Sarjapur Road ', ' Devanahalli ', ' Hanumantha Nagar', ' ', ' Doddakannelli ', ' Electronic City ', ' Sarjapur Road ', ' Jigani ', ' ', ' ', ' Jaya Nagar Block 3 ', ' whitefield ', ' whitefield ', ' Harlur ', ' Sarjapur Road ', ' Hoodi', ' Haralur Ma', ' Yelahanka ', ' Daulat Nagar ', ' Samel Pada ', ' Borivali West ', ' Goregaon West ', ' Vasai East ', ' Worli ', ' Kurla East ', ' Andheri East ', ' Kandivali East ', ' Mahim West ', ' Borivali West ', ' Chembur West ', ' ', ' ', ' Dadar West ', ' Boisar West ', ' Mira Road ', ' K', ' Virar West ', ' Vasai West ', ' Bhandup West ', ' Kalyan ', ' Goregaon East ', ' Marol Maroshi Road ', ' Malad West ', ' Vikhroli East ', ' Juhu ', ' Andheri East ', ' Jogeshwari West ', ' Mulund West ', ' Mannivakkam Chennai', ' New Perungalathur Chennai', ' Kovilambakkam Chennai', ' Perumbakkam Chennai', ' Mogappair Chennai', ' Tambaram West Tambaram Chennai', ' Manapakkam Chennai', ' Mogappair Chennai', ' Perumbakkam Chennai', ' Iyyappanthangal Chennai', ' Manapakkam Chennai', ' Madipakkam Chennai', ' Selaiyur Chennai', ' Velachery Chennai', ' Sithalapakkam Chennai', ' Adyar Chennai', ' Shankar Nagar Pammal Chennai', ' Chennai', ' Porur Chennai', ' Chennai', ' Villivakkam Chennai', ' Kundrathur Chennai', ' AGS Colony Velachery Chennai', ' Pallikaranai Chennai', ' Pozhichalur Chennai', ' Jothi Nagar Chitlapakkam Chennai', ' VGP Layout Palavakkam Chennai', ' Perundevi Ammal Nagar Chennai', ' Gu', ' Chennai', ' ', ' ', ' Madhurwada ', ' Pendurthi ', ' Pithapuram Colony ', ' Madhurwada ', ' Pendurthi ', ' Visalakshi Nagar ', ' Balaji Nagar ', ' Sujatha nagar ', ' ', ' Mural', ' Poth', ' Kurmannapalem ', ' Kurmannapalem ', ' Pendurthi ', ' Kurmannapalem ', ' ', ' ', ' Kancharapalem ', ' Kurmannapalem ', ' Railway New Colony ', ' Gajuwaka ', ' Aganampudi ', ' Madhurwada ', ' Tagarapuvalsa ', ' Parawada ', ' PM Palem ', ' Madhurwada ', ' MVP Colony '] ['bangalore', 'bangalore', 'bangalore', 'bangalore', 'bangalore', 'bangalore', 'bangalore', 'bangalore', 'bangalore', 'bangalore', 'bangalore', 'bangalore', 'bangalore', 'bangalore', 'bangalore', 'bangalore', 'bangalore', 'bangalore', 'bangalore', 'bangalore', 'bangalore', 'bangalore', 'bangalore', 'bangalore', 'bangalore', 'bangalore', 'bangalore', 'bangalore', 'bangalore', 'bangalore', 'mumbai', 'mumbai', 'mumbai', 'mumbai', 'mumbai', 'mumbai', 'mumbai', 'mumbai', 'mumbai', 'mumbai', 'mumbai', 'mumbai', 'mumbai', 'mumbai', 'mumbai', 'mumbai', 'mumbai', 'mumbai', 'mumbai', 'mumbai', 'mumbai', 'mumbai', 'mumbai', 'mumbai', 'mumbai', 'mumbai', 'mumbai', 'mumbai', 'mumbai', 'mumbai', 'chennai', 'chennai', 'chennai', 'chennai', 'chennai', 'chennai', 'chennai', 'chennai', 'chennai', 'chennai', 'chennai', 'chennai', 'chennai', 'chennai', 'chennai', 'chennai', 'chennai', 'chennai', 'chennai', 'chennai', 'chennai', 'chennai', 'chennai', 'chennai', 'chennai', 'chennai', 'chennai', 'chennai', 'chennai', 'chennai', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam', 'visakhapatnam'] 120 120 120 120 120 120 120 120
# inserting NaN's at index positions
per_sqt.insert(32,np.nan)
per_sqt.insert(39,np.nan)
per_sqt.insert(43,np.nan)
per_sqt.insert(60,np.nan)
per_sqt.insert(67,np.nan)
per_sqt.append(np.nan)
per_sqt.append(np.nan)
per_sqt.append(np.nan)
per_sqt.append(np.nan)
per_sqt.append(np.nan)
# inserting NaN's at index positions
Floor.append(np.nan)
Floor.append(np.nan)
# inserting NaN's at index positions
Carpet_Area.append(np.nan)
# Creation the Dict with cloumns names as strings and values as column names in which values are present.
df1 = {'City':City,
'Area':Area,
'Owner':Owner,
'BHK':BHK,
'Floor':Floor,
'price':price,
'Carpet_Area':Carpet_Area,
'per_sqt':per_sqt
}
Project1 = pd.DataFrame(df1)
Project1.head(20)
| City | Area | Owner | BHK | Floor | price | Carpet_Area | per_sqt | |
|---|---|---|---|---|---|---|---|---|
| 0 | bangalore | Kattigenahalli | Omkar Pandey | 3 | FloorGround out of 4 | ₹60 Lac | 1000 | ₹4950 |
| 1 | bangalore | Ayodaya Nagar | gokul | 2 | FloorGround out of 4 | ₹80 Lac | 1150 | ₹6957 |
| 2 | bangalore | Electronic City | Thayumanavan | 2 | Floor4 out of 4 | ₹48 Lac | 1060 | ₹4528 |
| 3 | bangalore | Tumkur Road | Prasanna | 3 | Floor14 out of 15 | ₹1.35 Cr | 1790 | ₹7542 |
| 4 | bangalore | Kudlu Gate | Akash Akash | 2 | Floor2 out of 4 | ₹61 Lac | 803 | ₹5706 |
| 5 | bangalore | Hal Stage 2 | av nath | 3 | Floor1 out of 4 | ₹1.30 Cr | 1640 | ₹7303 |
| 6 | bangalore | Murugeshpalya | Sangeeta Pillai | 2 | Floor3 out of 5 | ₹50 Lac | 1250 | ₹5000 |
| 7 | bangalore | Begur Road | ganga k | 3 | Basement out of 17 | ₹1.80 Cr | 1588 | ₹11335 |
| 8 | bangalore | Judicial Layout | Anjan K | 3 | Floor3 out of 14 | ₹1 Cr | 1000 | ₹8889 |
| 9 | bangalore | Bellandur | Omesh Saraf | 2 | Floor2 out of 10 | ₹1.02 Cr | 1465 | ₹6997 |
| 10 | bangalore | Kasturi Nagar | Mohd Hussain | 3 | FloorGround out of 7 | ₹1.45 Cr | 1500 | ₹8146 |
| 11 | bangalore | Koramangala Block 1 | Rahul Jain | 3 | Floor16 out of 16 | ₹3.85 Cr | 1418 | ₹20632 |
| 12 | bangalore | Sarjapur Road | Gaurav Kumar GAURAV | 3 | FloorGround out of 5 | ₹70 Lac | 1350 | ₹5655 |
| 13 | bangalore | Devanahalli | Ancy | 2 | Floor1 out of 7 | ₹85 Lac | 1000 | ₹6071 |
| 14 | bangalore | Hanumantha Nagar | Anoop | 2 | Floor2 out of 4 | ₹98 Lac | 1250 | ₹9800 |
| 15 | bangalore | geetha | 2 | Floor2 out of 5 | ₹60 Lac | 857 | ₹6367 | |
| 16 | bangalore | Doddakannelli | Omkar Omkar | 2 | Floor3 out of 6 | ₹78 Lac | 753 | ₹5158 |
| 17 | bangalore | Electronic City | ruchika | 2 | Floor1 out of 7 | ₹57 Lac | 1186 | ₹6374 |
| 18 | bangalore | Sarjapur Road | jeswanth | 3 | Floor1 out of 4 | ₹1.08 Cr | 1250 | ₹4553 |
| 19 | bangalore | Jigani | vikas saxena | 3 | Floor6 out of 12 | ₹57 Lac | 770 | ₹4545 |
# conversion of DataFrame to csv
Project1.to_csv('DF1.csv')
# Code for the collection of the data
# here list of 3 cities are taken and collection of data is done for them
cities = ['ranchi','haridwar','vadodara']
price=[]
per_sqt=[]
BHK=[]
Carpet_Area=[]
Owner=[]
Floor=[]
Area=[]
City=[]
for j in cities:
url = "https://www.magicbricks.com/flats-in-"+j+"-for-sale-pppfs"
print(url)
page = requests.get(url)
soup = BeautifulSoup(page.text)
for i in soup.find_all("div",class_="mb-srp__card__price--amount"):
price.append(i.text)
for i in soup.find_all("div",class_="mb-srp__card__price--size"):
per_sqt.append(i.text.split(' ')[0])
for i in soup.find_all("h2",class_="mb-srp__card--title"):
BHK.append(i.text[0])
for i in soup.find_all("div",class_="mb-srp__card__summary--value"):
a =i.text
b = re.findall(r"(\d+)\ssqft",str(a))
if b:
Carpet_Area.append(b[0])
for i in soup.find_all('div',class_='mb-srp__card__ads--name'):
Owner.append(i.text.split(':')[1])
for i in soup.find_all("div",class_="mb-srp__card__summary__list--item"):
a = i.text
b = re.findall(r"\w+\s\w+\s\w+\s\d+",a)
if b:
Floor.append(b[0])
for i in soup.find_all("h2",class_="mb-srp__card--title"):
Area.append(i.text.split('in')[1].split(',')[0])
City.append(j)
print(i,'---->', len(Owner))
print(i,'---->', len(Floor))
print(i,'---->', len(per_sqt))
https://www.magicbricks.com/flats-in-ranchi-for-sale-pppfs <h2 class="mb-srp__card--title">3 BHK Flat for Sale in Hawai Nagar , Ranchi</h2> ----> 30 <h2 class="mb-srp__card--title">3 BHK Flat for Sale in Hawai Nagar , Ranchi</h2> ----> 30 <h2 class="mb-srp__card--title">3 BHK Flat for Sale in Hawai Nagar , Ranchi</h2> ----> 28 https://www.magicbricks.com/flats-in-haridwar-for-sale-pppfs <h2 class="mb-srp__card--title">1 BHK Flat for Sale in Sidcul , Haridwar</h2> ----> 60 <h2 class="mb-srp__card--title">1 BHK Flat for Sale in Sidcul , Haridwar</h2> ----> 60 <h2 class="mb-srp__card--title">1 BHK Flat for Sale in Sidcul , Haridwar</h2> ----> 57 https://www.magicbricks.com/flats-in-vadodara-for-sale-pppfs <h2 class="mb-srp__card--title">4 BHK Flat for Sale in , Vadodara</h2> ----> 90 <h2 class="mb-srp__card--title">4 BHK Flat for Sale in , Vadodara</h2> ----> 89 <h2 class="mb-srp__card--title">4 BHK Flat for Sale in , Vadodara</h2> ----> 83
# inserting NaN's at index positions
Floor.append(np.nan)
Carpet_Area.append(np.nan)
# inserting NaN's at index positions
per_sqt.insert(11,np.nan)
per_sqt.insert(25,np.nan)
per_sqt.insert(53,np.nan)
per_sqt.insert(71,np.nan)
per_sqt.insert(72,np.nan)
per_sqt.insert(79,np.nan)
per_sqt.insert(84,np.nan)
# printing to see the values are obtained are not
print(price)
print(per_sqt)
print(BHK)
print(Carpet_Area)
print(Owner)
print(Floor)
print(Area)
print(City)
# Finding the length to see is all the lengths are same
print(len(price))
print(len(per_sqt))
print(len(BHK))
print(len(Carpet_Area))
print(len(Owner))
print(len(Floor))
print(len(Area))
print(len(City))
['₹62 Lac ', '₹76.4 Lac ', '₹77 Lac ', '₹31.5 Lac ', '₹68.6 Lac ', '₹75 Lac ', '₹40 Lac ', '₹58 Lac ', '₹60 Lac ', '₹56 Lac ', '₹51 Lac ', '₹77 Lac ', '₹31 Lac ', '₹33 Lac ', '₹1.15 Cr ', '₹68 Lac ', '₹82 Lac ', '₹32 Lac ', '₹58 Lac ', '₹45 Lac ', '₹45 Lac ', '₹45 Lac ', '₹1.25 Cr ', '₹40 Lac ', '₹48 Lac ', '₹1.05 Cr ', '₹60 Lac ', '₹91 Lac ', '₹48 Lac ', '₹1.25 Cr ', '₹41 Lac ', '₹77 Lac ', '₹23.5 Lac ', '₹58 Lac ', '₹14.3 Lac ', '₹1.90 Cr ', '₹38 Lac ', '₹32 Lac ', '₹50.5 Lac ', '₹47 Lac ', '₹28 Lac ', '₹55 Lac ', '₹40 Lac ', '₹35 Lac ', '₹48 Lac ', '₹58 Lac ', '₹16.5 Lac ', '₹25 Lac ', '₹20 Lac ', '₹16 Lac ', '₹67 Lac ', '₹25 Lac ', '₹28 Lac ', '₹37 Lac ', '₹40 Lac ', '₹66 Lac ', '₹14 Lac ', '₹34 Lac ', '₹65 Lac ', '₹22 Lac ', '₹60 Lac ', '₹80 Lac ', '₹22 Lac ', '₹31 Lac ', '₹25 Lac ', '₹23 Lac ', '₹15.5 Lac ', '₹45 Lac ', '₹79 Lac ', '₹45 Lac ', '₹30 Lac ', '₹7.5 Lac ', '₹45 Lac ', '₹35 Lac ', '₹65 Lac ', '₹47 Lac ', '₹19.5 Lac ', '₹25 Lac ', '₹31 Lac ', '₹35 Lac ', '₹25 Lac ', '₹90 Lac ', '₹50 Lac ', '₹82 Lac ', '₹19 Lac ', '₹60 Lac ', '₹8 Lac ', '₹27 Lac ', '₹42 Lac ', '₹1.20 Cr '] ['₹4655', '₹4200', '₹5325', '₹5497', '₹4400', '₹3731', '₹3774', '₹4531', '₹4317', '₹3733', '₹3682', nan, '₹2897', '₹3986', '₹5476', '₹5207', '₹5325', '₹2712', '₹4629', '₹4110', '₹4039', '₹4274', '₹5556', '₹3448', '₹3840', nan, '₹4484', '₹5170', '₹4800', '₹8333', '₹2522', '₹4529', '₹2901', '₹5273', '₹2192', '₹5429', '₹3140', '₹2960', '₹3483', '₹3167', '₹2605', '₹3963', '₹3636', '₹3500', '₹3967', '₹4531', '₹3113', '₹3846', '₹2857', '₹2133', '₹4295', '₹3125', '₹3230', nan, '₹3687', '₹3667', '₹2318', '₹3716', '₹3824', '₹2933', '₹2953', '₹5333', '₹2444', '₹3875', '₹2941', '₹3286', '₹2053', '₹2970', '₹4158', '₹2093', '₹2542', nan, nan, '₹3182', '₹3683', '₹3298', '₹1696', '₹2551', '₹2818', nan, '₹2174', '₹3817', '₹5556', '₹3216', nan, '₹2521', '₹4000', '₹3000', '₹2545', '₹4615'] ['3', '3', '3', '1', '3', '3', '2', '3', '3', '3', '3', '3', '2', '2', '3', '3', '3', '2', '2', '2', '2', '3', '4', '2', '3', '5', '3', '3', '2', '3', '2', '3', '2', '3', '1', '2', '3', '2', '2', '3', '2', '3', '2', '2', '3', '2', '1', '1', '1', '1', '4', '2', '1', '2', '2', '3', '1', '2', '3', '1', '3', '3', '2', '2', '2', '1', '1', '2', '3', '4', '2', '1', '2', '2', '3', '3', '2', '2', '2', '2', '2', '3', '3', '3', '2', '4', '1', '2', '3', '4'] ['1110', '1500', '1150', '573', '1560', '2010', '930', '1280', '1250', '1500', '1385', '1212', '1000', '800', '1925', '1306', '1540', '1180', '971', '950', '1114', '1053', '2250', '950', '1150', '1650', '1338', '1760', '968', '1500', '1490', '750', '1100', '650', '3000', '1210', '719', '1450', '1484', '1075', '1388', '1025', '890', '1050', '500', '530', '650', '700', '650', '1600', '800', '867', '1450', '900', '1800', '604', '915', '1700', '750', '2032', '1500', '900', '780', '1208', '700', '755', '1515', '1900', '1465', '1080', '450', '1010', '850', '1016', '1025', '800', '980', '800', '640', '1150', '2358', '900', '2350', '750', '1580', '200', '600', '1650', '2600', nan] [' Waseem Ahmad', ' Anuj Krchandra', ' Rabindra Bakshi', ' Abhishek Gupta', ' Shree Ram Developers', ' vivek chandra', ' sushant Bhattacharjee', ' Ritesh Nagpal', ' Nitin Kumar', ' PARTHO BOSE', ' Pradeep', ' rajesh singh', ' ashish singh', ' sanjeev', ' Kshav', ' Abhishek', ' Neeraj', ' Manit Kumar', ' Mukesh Mukesh', ' Samita Chatterjee', ' shakuntala verma', ' vikash singh', ' Avishek Modi Modi', ' Amar nath Pandey', ' MANISH KUMAR', ' taha', ' Rabish Kumar', ' Pintu Singh', ' Ravi Prasad', ' Rajiv Ranjan', ' Kiran Kumar Patel', ' SHREYA SAGAR', ' Krrish', ' Rajendra Wadhwa', ' Shakti Verma', ' Mridul Puri', ' Manish', ' Ajay', ' tarun agarwal', ' Arvind Bhardwaj', ' Jasjit Pannu', ' Abhishek Chauhan', ' sandeep', ' "Magicbricks User"', ' Dinesh Kukreti', ' Shalinirahal Banga', ' Alok Singh', ' saba b Khan', ' Rahul kumar', ' SANDEEP KUMAR', ' Sandeep Chaturvedi', ' AYUSH SAXENA', ' puneet', ' Abhishek chaudhary', ' Rishabh Kapoor', ' Surendra Satija', ' Harendra Kumar', ' chandra', ' Rohit', ' Dharmveer', ' Samy', ' Sudhanshu Kavthekar', ' Jaimin Parmar ', ' paresh', ' Mayur Patel', ' Parimal Patel', ' Rakesh', ' Bhavik Bhavik Mehta', ' Jyotsna Khoda', ' raghuvirsinh Vaghela', ' Pratik gosaliya', ' Umakant parmar', ' Akshat Dani', ' Nimisha', ' Hitansh Agrawal', ' anil', ' mitul patel', ' anil makvana', ' Manubhai Shah', ' Shashank', ' Pushpita Roy Choudhury', ' namita Namita', ' Deepak Singh', ' Manish Nagarajan', ' Ankur Javia', ' Jigar', ' Maheshbhai Dhokia', ' Aniruddha', ' Bindi shah', ' Jiten chokshi'] ['Floor3 out of 4', 'Floor5 out of 6', 'Floor4 out of 4', 'Floor2 out of 4', 'Floor8 out of 12', 'Floor2 out of 4', 'Floor1 out of 4', 'Floor2 out of 7', 'Floor4 out of 4', 'Floor1 out of 1', 'Floor3 out of 4', 'Floor4 out of 6', 'Floor1 out of 4', 'Floor3 out of 3', 'Floor9 out of 10', 'Floor4 out of 4', 'Floor4 out of 5', 'Floor3 out of 4', 'FloorGround out of 6', 'Floor3 out of 4', 'Floor3 out of 3', 'Floor1 out of 11', 'Floor3 out of 4', 'Floor3 out of 3', 'Floor2 out of 3', 'FloorGround out of 8', 'Floor1 out of 4', 'Floor4 out of 4', 'Floor1 out of 4', 'Floor4 out of 8', 'FloorGround out of 4', 'FloorGround out of 7', 'Floor6 out of 8', 'Floor1 out of 5', 'Floor1 out of 4', 'FloorGround out of 2', 'Floor2 out of 7', 'Floor3 out of 7', 'Floor6 out of 6', 'Floor1 out of 1', 'Floor4 out of 6', 'Floor2 out of 5', 'FloorGround out of 4', 'Floor3 out of 7', 'Floor5 out of 7', 'FloorGround out of 1', 'FloorGround out of 3', 'FloorGround out of 4', 'Floor3 out of 4', 'Floor1 out of 4', 'Floor2 out of 4', 'Floor3 out of 3', 'Floor4 out of 4', 'Floor3 out of 3', 'Floor5 out of 12', 'Floor4 out of 5', 'Floor2 out of 2', 'Floor3 out of 7', 'Floor2 out of 7', 'Floor4 out of 6', 'Floor5 out of 5', 'Floor3 out of 4', 'Floor3 out of 4', 'Floor1 out of 3', 'Floor5 out of 5', 'Floor2 out of 4', 'Floor6 out of 8', 'Floor3 out of 4', 'Floor4 out of 5', 'Floor1 out of 5', 'Floor3 out of 4', 'Floor1 out of 3', 'Floor3 out of 5', 'Floor5 out of 12', 'Floor4 out of 9', 'Floor2 out of 5', 'Floor2 out of 8', 'Floor1 out of 3', 'Floor4 out of 5', 'Floor4 out of 4', 'Floor4 out of 9', 'Floor5 out of 5', 'Floor9 out of 9', 'Floor3 out of 4', 'Floor5 out of 5', 'Floor2 out of 4', 'Floor1 out of 5', 'Floor3 out of 5', 'Floor2 out of 2', nan] [' H', ' ', ' S', ' Ratu Road ', ' Harihar S', ' Bariatu Road ', ' Morabadi ', ' Bahu Bazar ', ' ', ' Vikas Nagar ', ' Bariatu ', ' Hesag ', ' ', ' Barga', ' Sector 2 Masibari ', ' Upper Chutia ', ' Hazaribag Road ', ' Namkum ', ' Lalpur ', ' Hesag ', ' Kokar ', ' BIT Mesra ', ' ', ' Harihar S', ' H', ' ', ' Namkum ', ' ', ' Bariatu Road ', ' Hawai Nagar ', ' Sidcul ', ' Roshanabad ', ' Sidcul ', ' Devpura ', ' Laksar Road ', ' Arya Nagar ', ' Jwalapur ', ' Roshanabad ', ' Patanjali ', ' Rajaji National Park ', ' Roshanabad ', ' Jwalapur ', ' Jwalapur ', ' Jwalapur ', ' ', ' Roorkee ', ' Patanjali ', ' ', ' Shivalik Nagar ', ' Shantikunj ', ' Haripur Kalan ', ' ', ' Kankhal ', ' Jwalapur ', ' Patanjali ', ' NH-58 ', ' Jwalapur ', ' Sidcul ', ' NH-58 ', ' Sidcul ', ' Vasna Bhayli Ma', ' Race Course circle ', ' Ajwa Road ', ' ', ' Makar Pura ', ' Chhani ', ' Suryanagar ', ' Gotri Road Gotri ', ' Vasant Vihar ', ' Sevasi ', ' Chhani ', ' Nani Bapod ', ' Race Course circle ', ' Kalali ', ' Sama Savli Road ', ' Gorwa ', ' Bill ', ' Vasna Bhayli Ma', ' Subhanpura ', ' Bhayli ', ' Danteshwar ', ' Vasna Bhayli Ma', ' Gotri Sevasi Road ', ' Harni Road ', ' Sayajipura ', ' Bill ', ' ', ' Vasna Bhayli Ma', ' ', ' '] ['ranchi', 'ranchi', 'ranchi', 'ranchi', 'ranchi', 'ranchi', 'ranchi', 'ranchi', 'ranchi', 'ranchi', 'ranchi', 'ranchi', 'ranchi', 'ranchi', 'ranchi', 'ranchi', 'ranchi', 'ranchi', 'ranchi', 'ranchi', 'ranchi', 'ranchi', 'ranchi', 'ranchi', 'ranchi', 'ranchi', 'ranchi', 'ranchi', 'ranchi', 'ranchi', 'haridwar', 'haridwar', 'haridwar', 'haridwar', 'haridwar', 'haridwar', 'haridwar', 'haridwar', 'haridwar', 'haridwar', 'haridwar', 'haridwar', 'haridwar', 'haridwar', 'haridwar', 'haridwar', 'haridwar', 'haridwar', 'haridwar', 'haridwar', 'haridwar', 'haridwar', 'haridwar', 'haridwar', 'haridwar', 'haridwar', 'haridwar', 'haridwar', 'haridwar', 'haridwar', 'vadodara', 'vadodara', 'vadodara', 'vadodara', 'vadodara', 'vadodara', 'vadodara', 'vadodara', 'vadodara', 'vadodara', 'vadodara', 'vadodara', 'vadodara', 'vadodara', 'vadodara', 'vadodara', 'vadodara', 'vadodara', 'vadodara', 'vadodara', 'vadodara', 'vadodara', 'vadodara', 'vadodara', 'vadodara', 'vadodara', 'vadodara', 'vadodara', 'vadodara', 'vadodara'] 90 90 90 90 90 90 90 90
# Creation the Dict with cloumns names as strings and values as column names in which values are present.
df2 = {'City':City,
'Area':Area,
'Owner':Owner,
'BHK':BHK,
'Floor':Floor,
'price':price,
'Carpet_Area':Carpet_Area,
'per_sqt':per_sqt
}
# Creation of DataFrame
Project2=pd.DataFrame(df2)
Project2.head(20)
| City | Area | Owner | BHK | Floor | price | Carpet_Area | per_sqt | |
|---|---|---|---|---|---|---|---|---|
| 0 | ranchi | H | Waseem Ahmad | 3 | Floor3 out of 4 | ₹62 Lac | 1110 | ₹4655 |
| 1 | ranchi | Anuj Krchandra | 3 | Floor5 out of 6 | ₹76.4 Lac | 1500 | ₹4200 | |
| 2 | ranchi | S | Rabindra Bakshi | 3 | Floor4 out of 4 | ₹77 Lac | 1150 | ₹5325 |
| 3 | ranchi | Ratu Road | Abhishek Gupta | 1 | Floor2 out of 4 | ₹31.5 Lac | 573 | ₹5497 |
| 4 | ranchi | Harihar S | Shree Ram Developers | 3 | Floor8 out of 12 | ₹68.6 Lac | 1560 | ₹4400 |
| 5 | ranchi | Bariatu Road | vivek chandra | 3 | Floor2 out of 4 | ₹75 Lac | 2010 | ₹3731 |
| 6 | ranchi | Morabadi | sushant Bhattacharjee | 2 | Floor1 out of 4 | ₹40 Lac | 930 | ₹3774 |
| 7 | ranchi | Bahu Bazar | Ritesh Nagpal | 3 | Floor2 out of 7 | ₹58 Lac | 1280 | ₹4531 |
| 8 | ranchi | Nitin Kumar | 3 | Floor4 out of 4 | ₹60 Lac | 1250 | ₹4317 | |
| 9 | ranchi | Vikas Nagar | PARTHO BOSE | 3 | Floor1 out of 1 | ₹56 Lac | 1500 | ₹3733 |
| 10 | ranchi | Bariatu | Pradeep | 3 | Floor3 out of 4 | ₹51 Lac | 1385 | ₹3682 |
| 11 | ranchi | Hesag | rajesh singh | 3 | Floor4 out of 6 | ₹77 Lac | 1212 | NaN |
| 12 | ranchi | ashish singh | 2 | Floor1 out of 4 | ₹31 Lac | 1000 | ₹2897 | |
| 13 | ranchi | Barga | sanjeev | 2 | Floor3 out of 3 | ₹33 Lac | 800 | ₹3986 |
| 14 | ranchi | Sector 2 Masibari | Kshav | 3 | Floor9 out of 10 | ₹1.15 Cr | 1925 | ₹5476 |
| 15 | ranchi | Upper Chutia | Abhishek | 3 | Floor4 out of 4 | ₹68 Lac | 1306 | ₹5207 |
| 16 | ranchi | Hazaribag Road | Neeraj | 3 | Floor4 out of 5 | ₹82 Lac | 1540 | ₹5325 |
| 17 | ranchi | Namkum | Manit Kumar | 2 | Floor3 out of 4 | ₹32 Lac | 1180 | ₹2712 |
| 18 | ranchi | Lalpur | Mukesh Mukesh | 2 | FloorGround out of 6 | ₹58 Lac | 971 | ₹4629 |
| 19 | ranchi | Hesag | Samita Chatterjee | 2 | Floor3 out of 4 | ₹45 Lac | 950 | ₹4110 |
# conversion of DataFrame to csv
Project2.to_csv('DF2.csv')
# Code for the collection of the data
# here list of 7 cities are taken and collection of data is done for them
cities = ['greater-noida','gurgaon','mangalore','raipur','agra','bhiwadi','mysore']
City = []
Area=[]
Owner=[]
BHK=[]
Floor=[]
price=[]
Carpet_Area=[]
per_sqt=[]
for j in cities:
url = "https://www.magicbricks.com/flats-in-"+j+"-for-sale-pppfs"
print(url)
page = requests.get(url)
soup = BeautifulSoup(page.text)
for i in soup.find_all("div",class_="mb-srp__card__price--amount"):
price.append(i.text)
for i in soup.find_all("div",class_="mb-srp__card__price--size"):
per_sqt.append(i.text.split(' ')[0])
for i in soup.find_all("h2",class_="mb-srp__card--title"):
BHK.append(i.text[0])
for i in soup.find_all("div",class_="mb-srp__card__summary--value"):
a =i.text
b = re.findall(r"(\d+)\ssqft",str(a))
if b:
Carpet_Area.append(b[0])
for i in soup.find_all('div',class_='mb-srp__card__ads--name'):
Owner.append(i.text.split(':')[1])
for i in soup.find_all("div",class_="mb-srp__card__summary__list--item"):
a = i.text
b = re.findall(r"\w+\s\w+\s\w+\s\d+",a)
if b:
Floor.append(b[0])
for i in soup.find_all("h2",class_="mb-srp__card--title"):
Area.append(i.text.split('in')[1].split(',')[0])
City.append(j)
print(i,'---->', len(Owner))
print(i,'---->', len(Floor))
print(i,'---->', len(per_sqt))
https://www.magicbricks.com/flats-in-greater-noida-for-sale-pppfs <h2 class="mb-srp__card--title">3 BHK Flat for Sale in Yamuna Expressway , Greater Noida</h2> ----> 29 <h2 class="mb-srp__card--title">3 BHK Flat for Sale in Yamuna Expressway , Greater Noida</h2> ----> 31 <h2 class="mb-srp__card--title">3 BHK Flat for Sale in Yamuna Expressway , Greater Noida</h2> ----> 28 https://www.magicbricks.com/flats-in-gurgaon-for-sale-pppfs <h2 class="mb-srp__card--title">3 BHK Flat for Sale in Sector 65 , Gurgaon</h2> ----> 59 <h2 class="mb-srp__card--title">3 BHK Flat for Sale in Sector 65 , Gurgaon</h2> ----> 64 <h2 class="mb-srp__card--title">3 BHK Flat for Sale in Sector 65 , Gurgaon</h2> ----> 56 https://www.magicbricks.com/flats-in-mangalore-for-sale-pppfs <h2 class="mb-srp__card--title">2 BHK Flat for Sale in Yeyyadi Indl. Estate , Mangalore</h2> ----> 89 <h2 class="mb-srp__card--title">2 BHK Flat for Sale in Yeyyadi Indl. Estate , Mangalore</h2> ----> 93 <h2 class="mb-srp__card--title">2 BHK Flat for Sale in Yeyyadi Indl. Estate , Mangalore</h2> ----> 82 https://www.magicbricks.com/flats-in-raipur-for-sale-pppfs <h2 class="mb-srp__card--title">2 BHK Flat for Sale in Daldal Seoni , Raipur</h2> ----> 119 <h2 class="mb-srp__card--title">2 BHK Flat for Sale in Daldal Seoni , Raipur</h2> ----> 123 <h2 class="mb-srp__card--title">2 BHK Flat for Sale in Daldal Seoni , Raipur</h2> ----> 112 https://www.magicbricks.com/flats-in-agra-for-sale-pppfs <h2 class="mb-srp__card--title">3 BHK Flat for Sale in Rakabganj , Agra</h2> ----> 149 <h2 class="mb-srp__card--title">3 BHK Flat for Sale in Rakabganj , Agra</h2> ----> 153 <h2 class="mb-srp__card--title">3 BHK Flat for Sale in Rakabganj , Agra</h2> ----> 142 https://www.magicbricks.com/flats-in-bhiwadi-for-sale-pppfs <h2 class="mb-srp__card--title">2 BHK Flat for Sale in Alwar Bypass Road , Bhiwadi</h2> ----> 179 <h2 class="mb-srp__card--title">2 BHK Flat for Sale in Alwar Bypass Road , Bhiwadi</h2> ----> 184 <h2 class="mb-srp__card--title">2 BHK Flat for Sale in Alwar Bypass Road , Bhiwadi</h2> ----> 172 https://www.magicbricks.com/flats-in-mysore-for-sale-pppfs <h2 class="mb-srp__card--title">2 BHK Flat for Sale in Sriramapura , Mysore</h2> ----> 209 <h2 class="mb-srp__card--title">2 BHK Flat for Sale in Sriramapura , Mysore</h2> ----> 213 <h2 class="mb-srp__card--title">2 BHK Flat for Sale in Sriramapura , Mysore</h2> ----> 198
# printing to see the values are obtained are not
print(City)
print(Area)
print(BHK)
print(Carpet_Area)
print(Owner)
print(price)
print(per_sqt)
print(Floor)
# Finding the length to see is all the lengths are same
print(len(City))
print(len(Area))
print(len(BHK))
print(len(Carpet_Area))
print(len(Owner))
print(len(price))
print(len(per_sqt))
print(len(Floor))
['greater-noida', 'greater-noida', 'greater-noida', 'greater-noida', 'greater-noida', 'greater-noida', 'greater-noida', 'greater-noida', 'greater-noida', 'greater-noida', 'greater-noida', 'greater-noida', 'greater-noida', 'greater-noida', 'greater-noida', 'greater-noida', 'greater-noida', 'greater-noida', 'greater-noida', 'greater-noida', 'greater-noida', 'greater-noida', 'greater-noida', 'greater-noida', 'greater-noida', 'greater-noida', 'greater-noida', 'greater-noida', 'greater-noida', 'greater-noida', 'gurgaon', 'gurgaon', 'gurgaon', 'gurgaon', 'gurgaon', 'gurgaon', 'gurgaon', 'gurgaon', 'gurgaon', 'gurgaon', 'gurgaon', 'gurgaon', 'gurgaon', 'gurgaon', 'gurgaon', 'gurgaon', 'gurgaon', 'gurgaon', 'gurgaon', 'gurgaon', 'gurgaon', 'gurgaon', 'gurgaon', 'gurgaon', 'gurgaon', 'gurgaon', 'gurgaon', 'gurgaon', 'gurgaon', 'gurgaon', 'mangalore', 'mangalore', 'mangalore', 'mangalore', 'mangalore', 'mangalore', 'mangalore', 'mangalore', 'mangalore', 'mangalore', 'mangalore', 'mangalore', 'mangalore', 'mangalore', 'mangalore', 'mangalore', 'mangalore', 'mangalore', 'mangalore', 'mangalore', 'mangalore', 'mangalore', 'mangalore', 'mangalore', 'mangalore', 'mangalore', 'mangalore', 'mangalore', 'mangalore', 'mangalore', 'raipur', 'raipur', 'raipur', 'raipur', 'raipur', 'raipur', 'raipur', 'raipur', 'raipur', 'raipur', 'raipur', 'raipur', 'raipur', 'raipur', 'raipur', 'raipur', 'raipur', 'raipur', 'raipur', 'raipur', 'raipur', 'raipur', 'raipur', 'raipur', 'raipur', 'raipur', 'raipur', 'raipur', 'raipur', 'raipur', 'agra', 'agra', 'agra', 'agra', 'agra', 'agra', 'agra', 'agra', 'agra', 'agra', 'agra', 'agra', 'agra', 'agra', 'agra', 'agra', 'agra', 'agra', 'agra', 'agra', 'agra', 'agra', 'agra', 'agra', 'agra', 'agra', 'agra', 'agra', 'agra', 'agra', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'bhiwadi', 'mysore', 'mysore', 'mysore', 'mysore', 'mysore', 'mysore', 'mysore', 'mysore', 'mysore', 'mysore', 'mysore', 'mysore', 'mysore', 'mysore', 'mysore', 'mysore', 'mysore', 'mysore', 'mysore', 'mysore', 'mysore', 'mysore', 'mysore', 'mysore', 'mysore', 'mysore', 'mysore', 'mysore', 'mysore', 'mysore'] [' Zeta 1 ', ' Sector P4 ', ' Eta 2', ' Sector 3 ', ' Zeta 1 ', ' Omicron ', ' Greater Noida West ', ' Eta 2', ' Yamuna Expressway ', ' Greater Noida West ', ' ', ' Chi 5 ', ' Jaypee Greens ', ' Zeta 1 ', ' Greater Noida West ', ' Omicron 1 ', ' Greater Noida West ', ' Sector 1 Greater Noida West ', ' Greater Noida West ', ' Greater Noida West ', ' Greater Noida West ', ' ', ' Noida Extention ', ' Sector 1 Greater Noida West ', ' Omicron 3 ', ' UPSIDC Site C Block H ', ' Sector 16 ', ' Chi 5 ', ' Yamuna Expressway ', ' Yamuna Expressway ', ' Jal Vayu Vihar', ' Sohna Sector 32 ', ' Kendriya Vihar', ' Sector 23 ', ' Sohna Sector 35 ', ' Sector 84 ', ' Sector 104 ', ' Sector 78 ', ' Sector 112 ', ' Sector 102 ', ' Sector 48 ', ' Sector 7 ', ' Sector 55 ', ' DLF City Phase 1 ', ' Sector 57 ', ' New Colony', ' Sohna Sector 33 ', ' Sector 83 ', ' Sector 47 ', ' Sector 37C', ' Sector 9 ', ' DLF City Phase 1 ', ' Sector 4 ', ' South City 2', ' New Palam Vihar', ' Sector 54 ', ' Patel Nagar', ' Sector 77', ' Sector 65 ', ' Sector 65 ', ' Shakti Nagar ', ' Urwa ', ' Bunts Hostel Road ', ' Urwa ', ' Kulshekar ', ' Kulai ', ' Bajpe ', ' Ujire ', ' Bondel ', ' Kateel ', ' K', ' Shakti Nagar ', ' Falnir ', ' Kulai ', ' Kadri ', ' Padil ', ' Kulai ', ' Urwa ', ' Falnir ', ' Jepp', ' Derebail ', ' Mulky ', ' ', ' Pandeshwar ', ' Ashok Nagar ', ' Konchady Cross Road ', ' Pumpwell ', ' Mukka ', ' Nandigudda ', ' Yeyyadi Indl. Estate ', ' Daldal Seoni ', ' Mowa ', ' Avanti Vihar ', ' Shankar Nagar ', ' Santoshi Nagar ', ' Naya Raipur ', ' Hirapur Road ', ' VIP Road ', ' Mathpurena ', ' Boria Kalan ', ' Amlidhi ', ' Bhatagaon ', ' Kabir Nagar ', ' Shankar Nagar ', ' Hirapur Road ', ' NH 6 ', ' Shankar Nagar ', ' Deopuri ', ' Kota ', ' Amlidhi ', ' Hirapur Road ', ' Santoshi Nagar ', ' Boria Kalan ', ' Shankar Nagar ', ' Amleshwar ', ' Amlidhi ', ' Kachna Road ', ' Mowa ', ' Amanaka ', ' Daldal Seoni ', ' Sikandra ', ' Vibhav Nagar ', ' Vijay Nagar Colony ', ' Civil L', ' Dayal Bagh ', ' Civil L', ' Shastri Puram ', ' Agra Shamshabad Raja Kherah Marg ', ' Dayal Bagh ', ' Dayal Bagh ', ' Dayal Bagh ', ' Fatehabad Road ', ' Agra Shamshabad Raja Kherah Marg ', ' Sikandra ', ' Sikandra ', ' Dayal Bagh ', ' Sikandra ', ' Fatehabad Road ', ' Shastri Puram ', ' Shastri Puram ', ' Fatehabad Road ', ' Sikandra ', ' Shahganj ', ' Fatehabad Road ', ' Gwalior Road ', ' Agra Shamshabad Raja Kherah Marg ', ' Sector 16B Awas Vikas Colony ', ' Fatehabad Road ', ' Shastri Puram ', ' Rakabganj ', ' Alwar Bypass Road ', ' ', ' Alwar Bypass Road ', ' Alwar Bypass Road ', ' Alwar Bypass Road ', ' Alwar Bypass Road ', ' Alwar Bypass Road ', ' Alwar Bypass Road ', ' Alwar Bypass Road ', ' Alwar Bypass Road ', ' Alwar Bypass Road ', ' Bhiwadi Mod ', ' Tapukara ', ' Alwar Bhiwadi Road ', ' Vasundhara Nagar ', ' Alwar Bypass Road ', ' Alwar Bypass Road ', ' Alwar Bypass Road ', ' Alwar Bypass Road ', ' Alwar Bypass Road ', ' Alwar Bypass Road ', ' Alwar Bypass Road ', ' Alwar Bypass Road ', ' Rampura ', ' Alwar Bypass Road ', ' Alwar Bypass Road ', ' Vasundhara Nagar ', ' Tapukara ', ' Alwar Bypass Road ', ' Alwar Bypass Road ', ' Gokulam ', ' Bogadi ', ' JP Nagar ', ' Yadavgiri ', ' Yadavgiri ', ' Hebbal ', ' Vijayanagar 4th Stage ', ' Bogadi ', ' Sharadadevi Nagar ', ' Alanahalli ', ' Vijaynagar 3rd Stage ', ' Belagola ', ' ', ' Visveshwara Nagar ', ' ', ' Siddhartha Layout ', ' Vidyarayanapuram ', ' Srirangapatnam ', ' JP Nagar ', ' Bogadi ', ' Chamarajapura ', ' Metagalli ', ' Jaya Laxmi Puram ', ' Hebbal 2nd Stage ', ' Lakshmipuram ', ' ', ' Kuvempunagar ', ' ', ' V V Mohalla ', ' Sriramapura '] ['2', '3', '3', '2', '2', '3', '4', '2', '1', '2', '2', '4', '4', '4', '2', '2', '3', '2', '3', '2', '2', '2', '3', '2', '3', '2', '3', '3', '2', '3', '2', '3', '3', '2', '1', '3', '2', '3', '3', '4', '3', '3', '3', '4', '3', '3', '2', '3', '3', '2', '4', '2', '4', '4', '2', '3', '3', '4', '3', '3', '2', '2', '3', '2', '3', '2', '4', ' ', '3', '2', '1', '2', '3', '2', '2', '3', '3', '2', '2', '2', '2', '2', '2', '2', '3', '2', '3', '2', '3', '2', '2', '3', '3', '2', '3', '2', '3', '2', '2', '2', '2', '2', '1', '2', '2', '3', '2', '3', '3', '2', '3', '3', '3', '3', '3', '2', '3', '2', '1', '2', '2', '3', '2', '2', '5', '2', '2', '3', '3', '3', '3', '2', '2', '2', '2', '3', '5', '3', '2', '2', '3', '3', '2', '3', '2', '2', '2', '3', '2', '3', '2', '2', '2', '1', '2', '2', '2', '2', '1', '1', '3', '1', '2', '2', '2', '2', '2', '2', '2', '2', '2', '2', '2', '2', '2', '3', '3', '2', '1', '2', '3', '4', '2', '2', '2', '1', '2', '3', '4', '2', '2', '4', '2', '2', '2', '2', '3', '3', '3', '3', '2', '3', '3', '2', '3', '2', '2', '2', '2', '2'] ['1200', '1120', '995', '970', '895', '1775', '2364', '660', nan, '950', '995', '3210', '3441', '2200', '910', '840', '1530', '862', '1055', '615', '1155', '950', '840', '870', '1395', '1115', '1285', '1494', '1115', '13750', '900', '1789', '1500', '750', '310', '1750', '570', '1657', '3763', '1800', '1300', '900', '1950', '1556', '1217', nan, '610', '1745', '2470', '10372', '1450', '1250', '1550', '1800', '550', '3650', '960', '1500', '1500', '3069', '670', '1100', '1700', '1275', '1330', '906', '3030', nan, '1390', '1260', '660', '1166', '3900', '975', '1390', '1070', '1870', '1040', '1000', '1320', '783', '72', '1000', '1330', '1356', '884', '1613', '1100', '1341', '1200', '970', '1200', '1985', '840', '1250', '943', '799', '900', '1050', '487', '700', '815', '505', '866', '721', '1200', '1030', '1500', '1805', '1230', '965', '1250', '1337', '960', '1200', '1000', '1500', '860', '550', '906', '900', '1540', '1250', '850', nan, '1050', '848', '1350', '1050', '1400', nan, '1192', '1115', '1000', '660', '1675', '2410', nan, '862', '1250', '1400', '1800', '1750', '1675', '960', '940', '1425', '926', '861', '1500', '1165', '555', '980', '466', '1234', '750', '780', '650', '630', '600', '1430', '660', '815', '1250', '1170', '1111', '1000', '1150', '829', '700', '950', '1250', '1150', '599', '750', '1590', '1522', '500', '630', '790', '1650', '1487', '1095', '9', '1280', '400', '1400', '1174', '650', '1050', '890', '2000', '911', '1280', '800', '1095', '1553', '1875', '1681', '1400', '1019', '2200', '1380', '1100', '1650', '1070', '1080', '1200', '900', '1115'] [' RAHUL TRIVEDI', ' R Bhattacharjee', ' Ashutosh', ' Sandeep gautam', ' vijay', ' PRASAD', ' Kamal Bansal', ' Avik Dey', ' Harkesh Sharma', ' Bhagat Singh', ' Mangat Rai', ' AN SINHA', ' Nihit', ' Shiv Kumar', ' Right Value Solutions', ' Adhunik Propmart', ' Homes And Desire', ' Investors Lab', ' Vardhman Reality', ' piyush nair', ' A Plus Associates', ' Bricksnwall Innovations Pvt.Ltd.', ' Pragati ', ' Prof Arora', ' Prabhat Sharma', ' Gaurav', ' sanjeev', ' Mohd Sarfaraz', ' Atif', ' Sandeep Jain', ' Mohit', ' Nithin Abraham', ' Arun Yadav', ' Lionel Richie', ' Shubham Solanki', ' Anshul', ' Ajay Sharma', ' Pushpendra Sethi', ' Y S Dwivedi', ' Rahul Arora', ' Devesh', ' RAHUL', ' Kapur', ' sanjay kaushik', ' Naresh', ' Rahul', ' Karun', ' Gurdeep Singh', ' Pradeep Sharma', ' Ravinder Chauhan', ' Axiom Landbase Pvt. Ltd.', ' Jk Batra', ' sunil yadav', ' Rishabh Arora', ' Pankaj Ahlawat', ' VIVEK SHARMA', ' Real Estate Gurgaon', ' Akhilesh Sharma ', ' Ankush', ' Manjunath S', ' Govinda', ' Varun Nayak', ' Jeanson Veigas', ' Chethan', ' Naveen Kamath', ' Devdhar Shetty', ' Gopal', ' Stany', ' Brahmari Vilas', ' Baptist Dcunha', ' Priya', ' Iqbal ', ' Santosh Babu Salian', ' Hassan', ' Rajesh Antony Fernandes', ' jaysen', ' Govinda Sharma', ' hashura', ' Lance', ' Arwin', ' sharath kumar', ' Sameer', ' Sabeena Farhath', ' Suman Kumar', ' pkumar', " James D'SA", ' roshan', ' Sunil Veigas', ' SNEHA', ' D Suryaprakash', ' Bhumika Bansal', ' carlus', ' Vaibhav Shukla', ' Khushboo Kashyap', ' Deekaha', ' Divya Singh', ' Arpit Jain', ' Mahesh', ' geeta shrivastava', ' ajit singh', ' Prem Pvt Ltd', ' vijay', ' Siddharth Buildcon', ' Kusum kumari', ' Priyesh Gupta', ' Devinder Singh', ' womesh womesh', ' sandip Agarwal', ' Chainika', ' Karnjeet Biswas', ' TEJESHWARI SAHU', ' Mitchell', ' Angad Lalwani', ' achyutananda', ' Manoj mati', ' vidyadhar behra', ' Sumit Wadhwani', ' Saurabh upadhyay', ' Shubham Soni', ' Vikas Gulati', ' vishal sharma', ' Arnav Singh', ' Vinod Kumar', ' Deepika Chahar', ' Shikha', ' Shubham Kumar', ' hemant sharma', ' Prem lal Taneja', ' Yatendra kumar mehrotra', ' Umang Mathur', ' Akshit Rajoriya', ' Sanjeev Singh', ' nitin singh', ' ajay kumar', ' Divya pandey', ' GALAXY PRINT PACK', ' SHWETANG SHARMA', ' PRAVEEN KUMAR', ' Sapna', ' Ganpati World', ' Shrey Shankar', ' Rakesh Agarwal ', ' Sanchit gupta', ' PRASHANT', ' adil hussain', ' Mohini Sharma', ' vivek jain', ' VIMAL TIWARI', ' Dr Archika Gupta Dr Archika', ' Amit Kumar Saini', ' Radha Kanha', ' M K Realtech', ' R K Real Estate', ' Vikram', ' Amita Singh', ' Raj Singh', ' Dharm Chauhan', ' Sunil Kumar', ' vineet', ' Sandeep singh', ' Ashok Sood', ' munish', ' Brahmprakash', ' Satish', ' Sachinrajput', ' Sunil Kumar', ' Shailender', ' Sumit Kumar', ' Rahul Rijhwani', ' darshan kumar', ' Chandresh', ' Rahul Chauhan', ' Narendra Kumar Agarwal', ' Vivek Anand Semwal', ' Vibhu', ' SANCHITA', ' kuldeep tomer', ' Parveen khanna', ' Pardeep N/A ', ' Vijay', ' Ravi Nambiar', ' Pick Your Prop Estates LLP', ' Veena', ' Amar Kumar Prasad', ' Jagadeesha', ' Meghana', ' Shashi', ' Satish Krishna', ' ARUN', ' subbareddy subbareddy', ' Mallesh', ' Karthik', ' Nanda', ' Mohammed Mansoor', ' Shriya Mankani', ' Nataraj D M', ' Narasimha murthy', ' Vijay Menon', ' Prakash', ' G S ANANTHA', ' Gowrish Bhaskar', ' Ramachandra Murthy', ' Thulaseedharan pillai A', ' Rathan', ' sheelavathi', ' Nayana A.S', ' Srikanth', ' MANJUNATHA', ' Chakko', nan] ['₹47.5 Lac ', '₹46 Lac ', '₹37 Lac ', '₹24 Lac ', '₹48 Lac ', '₹65.4 Lac ', '₹1 Cr ', '₹27 Lac ', '₹15 Lac ', '₹35 Lac ', '₹65 Lac ', '₹2.10 Cr ', '₹3.31 Cr ', '₹98 Lac ', '₹43.5 Lac ', '₹48.5 Lac ', '₹80 Lac ', '₹61.2 Lac ', '₹56 Lac ', '₹50.5 Lac ', '₹63 Lac ', '₹25.5 Lac ', '₹68.9 Lac ', '₹23 Lac ', '₹55 Lac ', '₹44 Lac ', '₹51 Lac ', '₹65 Lac ', '₹33 Lac ', '₹50 Lac ', '₹80 Lac ', '₹1.80 Cr ', '₹1.50 Cr ', '₹45 Lac ', '₹17 Lac ', '₹93 Lac ', '₹35 Lac ', '₹81.1 Lac ', '₹4.60 Cr ', '₹2.60 Cr ', '₹1.20 Cr ', '₹54 Lac ', '₹1.40 Cr ', '₹1.85 Cr ', '₹99 Lac ', '₹85 Lac ', '₹42 Lac ', '₹1 Cr ', '₹3.50 Cr ', '₹93 Lac ', '₹1.20 Cr ', '₹1.32 Cr ', '₹1.60 Cr ', '₹2.35 Cr ', '₹28 Lac ', '₹10.90 Cr ', '₹46 Lac ', '₹1.50 Cr ', '₹2.28 Cr ', '₹5 Cr ', '₹22 Lac ', '₹79 Lac ', '₹69.5 Lac ', '₹58 Lac ', '₹54.8 Lac ', '₹65 Lac ', '₹2 Cr ', '₹10.1 Lac ', '₹65 Lac ', '₹35 Lac ', '₹19 Lac ', '₹40 Lac ', '₹1.80 Cr ', '₹65 Lac ', '₹75 Lac ', '₹48.5 Lac ', '₹90 Lac ', '₹79 Lac ', '₹65 Lac ', '₹50 Lac ', '₹60 Lac ', '₹35 Lac ', '₹32 Lac ', '₹75 Lac ', '₹1.01 Cr ', '₹40 Lac ', '₹1.20 Cr ', '₹45 Lac ', '₹68.5 Lac ', '₹48 Lac ', '₹25 Lac ', '₹77 Lac ', '₹68 Lac ', '₹52 Lac ', '₹29.1 Lac ', '₹31.8 Lac ', '₹32.5 Lac ', '₹30 Lac ', '₹42 Lac ', '₹7.5 Lac ', '₹23 Lac ', '₹31.5 Lac ', '₹14 Lac ', '₹35 Lac ', '₹25 Lac ', '₹33 Lac ', '₹30 Lac ', '₹33.3 Lac ', '₹1.35 Cr ', '₹49 Lac ', '₹30 Lac ', '₹29.1 Lac ', '₹33 Lac ', '₹43.4 Lac ', '₹25 Lac ', '₹36 Lac ', '₹42.9 Lac ', '₹20 Lac ', '₹12.5 Lac ', '₹25 Lac ', '₹25.5 Lac ', '₹43.5 Lac ', '₹68 Lac ', '₹35 Lac ', '₹1.35 Cr ', '₹65 Lac ', '₹44.4 Lac ', '₹33.5 Lac ', '₹42 Lac ', '₹48 Lac ', '₹47 Lac ', '₹40.5 Lac ', '₹28 Lac ', '₹35 Lac ', '₹17.5 Lac ', '₹60 Lac ', '₹81.9 Lac ', '₹55 Lac ', '₹40 Lac ', '₹42 Lac ', '₹44 Lac ', '₹65 Lac ', '₹59.9 Lac ', '₹51 Lac ', '₹24 Lac ', '₹44 Lac ', '₹65 Lac ', '₹24 Lac ', '₹41.3 Lac ', '₹90 Lac ', '₹38 Lac ', '₹12 Lac ', '₹23 Lac ', '₹13 Lac ', '₹36 Lac ', '₹18 Lac ', '₹23 Lac ', '₹13.5 Lac ', '₹11 Lac ', '₹16.8 Lac ', '₹37 Lac ', '₹28 Lac ', '₹15 Lac ', '₹18.8 Lac ', '₹28 Lac ', '₹17.9 Lac ', '₹19 Lac ', '₹27 Lac ', '₹32.5 Lac ', '₹14.5 Lac ', '₹21 Lac ', '₹30 Lac ', '₹22 Lac ', '₹20 Lac ', '₹20 Lac ', '₹59 Lac ', '₹50 Lac ', '₹10 Lac ', '₹18 Lac ', '₹30 Lac ', '₹80 Lac ', '₹60 Lac ', '₹45 Lac ', '₹68 Lac ', '₹95 Lac ', '₹58 Lac ', '₹82 Lac ', '₹58 Lac ', '₹50 Lac ', '₹85 Lac ', '₹58 Lac ', '₹1.01 Cr ', '₹60 Lac ', '₹56 Lac ', '₹48 Lac ', '₹65.7 Lac ', '₹60 Lac ', '₹75 Lac ', '₹75 Lac ', '₹70 Lac ', '₹52 Lac ', '₹1.45 Cr ', '₹85 Lac ', '₹1.63 Cr ', '₹80 Lac ', '₹40 Lac ', '₹69 Lac ', '₹60 Lac ', '₹60 Lac ', '₹72 Lac '] ['₹3958', '₹3286', nan, '₹3719', '₹2474', '₹5363', nan, '₹4230', '₹4091', '₹4805', '₹3684', '₹6533', '₹6542', '₹9001', '₹3912', '₹4780', '₹4663', '₹5229', '₹5322', '₹5308', '₹4630', '₹5020', '₹2429', '₹4700', '₹2370', '₹3943', '₹3946', '₹4351', '₹2960', '₹3636', '₹8889', '₹10061', nan, '₹5294', '₹3864', '₹4439', '₹5000', '₹4894', '₹12224', '₹9630', '₹8000', '₹5143', '₹7179', '₹11889', '₹8135', '₹8213', '₹4421', '₹5731', '₹14170', '₹6503', '₹7500', '₹9429', '₹7111', '₹11190', '₹3733', '₹25952', '₹4182', '₹7692', '₹12473', nan, '₹3284', '₹6371', '₹3971', '₹4549', '₹3487', nan, '₹5882', nan, '₹4676', '₹3070', '₹2879', nan, '₹4615', '₹6667', '₹5396', '₹4533', '₹4813', '₹6371', '₹6500', '₹3788', '₹5911', '₹3507', '₹3200', '₹5639', '₹5913', '₹4525', '₹6366', '₹3750', nan, '₹3491', '₹2577', '₹4968', '₹3426', '₹6190', '₹2327', '₹3372', '₹3368', '₹3333', '₹4000', '₹1540', '₹2706', '₹3099', '₹2772', '₹3000', '₹3467', '₹2538', '₹2913', '₹1852', '₹7479', '₹3984', '₹3109', '₹2327', '₹3143', '₹3600', '₹2778', '₹3600', '₹2860', '₹2326', '₹2273', '₹2759', '₹2833', '₹2753', '₹5440', '₹3097', '₹4500000', '₹6190', '₹3589', '₹2481', '₹4000', '₹3000', '₹4695', '₹3400', '₹2511', '₹3167', '₹2500', '₹3582', '₹3997', '₹2555', '₹3200', '₹3359', '₹2627', '₹3611', '₹3328', '₹3045', '₹2264', '₹3745', '₹4561', '₹2592', '₹3944', '₹6000', '₹4077', '₹1846', '₹1941', '₹2167', '₹2917', '₹2118', '₹2244', '₹2077', '₹1746', '₹2585', '₹2587', '₹4242', '₹1840', '₹1500', '₹2393', '₹1611', '₹1900', '₹2348', '₹2790', '₹1503', '₹2211', '₹2400', '₹1913', '₹2367', '₹2667', '₹3711', '₹3285', '₹1818', '₹2857', '₹3797', '₹3509', '₹4110', '₹7556', '₹6611', nan, '₹4296', '₹5857', '₹4940', '₹4762', '₹6071', '₹5115', '₹4208', '₹6586', '₹4375', '₹3863', '₹4000', nan, nan, '₹4462', '₹5000', '₹5103', '₹6591', '₹5743', '₹14818', '₹4848', '₹6389', '₹5000', nan, '₹6667', '₹6457'] ['Floor17 out of 22', 'Floor7 out of 9', 'Floor8 out of 25', 'Floor1 out of 2', 'Floor8 out of 16', 'Floor9 out of 14', 'Floor18 out of 32', 'Floor14 out of 26', 'Floor1 out of 4', 'Floor25 out of 25', 'Floor16 out of 24', 'Floor11 out of 22', 'Floor6 out of 34', 'Floor3 out of 6', 'Floor5 out of 15', 'Floor11 out of 20', 'Floor6 out of 23', 'Floor19 out of 20', 'Floor12 out of 24', 'Floor26 out of 27', 'Floor17 out of 19', 'Floor1 out of 4', 'Floor10 out of 19', 'Floor1 out of 5', 'Floor20 out of 21', 'Floor6 out of 14', 'Floor4 out of 22', 'Floor7 out of 21', 'Express Park View 2', 'Floor25 out of 25', 'Floor22 out of 25', 'Floor3 out of 3', 'Floor10 out of 14', 'Floor3 out of 10', 'SocietyKendriya Vihar Sector 56', 'Floor1 out of 4', 'Floor7 out of 10', 'Floor19 out of 20', 'Floor9 out of 15', 'Floor3 out of 14', 'Monsoon Breeze Phase 2', 'Floor10 out of 27', 'Floor8 out of 22', 'Floor4 out of 4', 'Floor3 out of 4', 'Floor4 out of 9', 'Floor5 out of 6', 'Floor2 out of 3', 'Floor1 out of 4', 'Floor8 out of 10', 'FloorGround out of 4', 'Floor1 out of 18', 'Floor2 out of 14', 'Floor4 out of 9', 'Floor2 out of 3', 'City Plot Phase 1', 'Floor3 out of 4', 'Floor3 out of 4', 'Floor2 out of 4', 'FloorGround out of 32', 'Floor1 out of 3', 'Floor6 out of 12', 'Floor19 out of 47', 'Floor11 out of 14', 'Floor2 out of 2', 'Floor4 out of 5', 'Floor4 out of 8', 'Floor3 out of 4', 'Floor6 out of 14', 'Floor7 out of 25', 'Floor22 out of 23', 'Floor5 out of 5', 'Floor2 out of 4', 'Floor1 out of 4', 'Floor12 out of 14', 'Floor5 out of 6', 'Floor14 out of 23', 'Floor7 out of 13', 'Floor9 out of 9', 'Floor7 out of 19', 'Floor4 out of 5', 'Floor3 out of 4', 'Floor5 out of 5', 'Floor2 out of 5', 'Floor4 out of 5', 'Floor4 out of 4', 'Floor1 out of 16', 'Floor1 out of 4', 'Floor3 out of 5', 'Floor3 out of 5', 'Floor5 out of 5', 'Floor4 out of 4', 'Floor4 out of 4', 'Floor2 out of 5', 'Floor6 out of 6', 'Floor4 out of 6', 'Floor5 out of 5', 'Basement out of 4', 'Floor5 out of 8', 'Floor3 out of 10', 'FloorGround out of 3', 'Floor1 out of 4', 'Floor1 out of 3', 'Floor2 out of 6', 'Floor3 out of 8', 'Floor6 out of 6', 'Floor10 out of 10', 'Floor7 out of 10', 'Floor3 out of 6', 'Floor3 out of 5', 'Floor7 out of 11', 'Floor4 out of 7', 'Floor4 out of 8', 'Floor5 out of 11', 'FloorGround out of 4', 'Floor1 out of 1', 'Floor2 out of 10', 'Floor2 out of 4', 'Floor3 out of 6', 'Floor5 out of 10', 'Floor4 out of 6', 'Floor2 out of 3', 'Floor1 out of 6', 'Floor3 out of 8', 'Floor3 out of 4', 'Floor2 out of 6', 'Floor2 out of 5', 'Floor1 out of 2', 'Floor2 out of 5', 'Floor8 out of 9', 'Floor1 out of 10', 'Floor2 out of 2', 'Floor1 out of 2', 'Floor1 out of 3', 'Floor9 out of 11', 'FloorGround out of 3', 'Floor3 out of 5', 'Floor7 out of 10', 'Floor4 out of 14', 'Floor5 out of 9', 'Floor3 out of 13', 'Floor1 out of 11', 'Floor2 out of 11', 'Floor7 out of 15', 'Floor1 out of 10', 'Floor3 out of 4', 'Floor11 out of 14', 'Floor2 out of 2', 'Floor12 out of 12', 'Floor3 out of 9', 'Floor9 out of 9', 'Floor2 out of 11', 'Floor2 out of 6', 'Floor6 out of 13', 'Floor5 out of 6', 'Floor7 out of 10', 'Floor3 out of 13', 'Floor3 out of 13', 'Floor1 out of 9', 'Floor4 out of 14', 'Floor13 out of 15', 'Basement out of 15', 'Floor9 out of 16', 'Floor2 out of 14', 'Floor2 out of 4', 'Floor8 out of 13', 'Floor15 out of 15', 'Floor5 out of 7', 'Floor4 out of 9', 'Floor9 out of 14', 'Floor13 out of 14', 'Floor11 out of 16', 'Floor11 out of 15', 'SocietyAvalon Residency Phase 2', 'Floor12 out of 15', 'Floor9 out of 15', 'Floor9 out of 12', 'Floor7 out of 14', 'Floor14 out of 14', 'Floor4 out of 9', 'Floor2 out of 5', 'Floor9 out of 14', 'Floor2 out of 13', 'SocietyKrish City Phase 2', 'Floor2 out of 4', 'Floor6 out of 8', 'Floor3 out of 8', 'Floor2 out of 2', 'Floor1 out of 4', 'Floor4 out of 4', 'Floor4 out of 5', 'Floor8 out of 8', 'Floor2 out of 3', 'Floor2 out of 4', 'Floor3 out of 4', 'Basement out of 1', 'Floor1 out of 4', 'Floor3 out of 4', 'Floor1 out of 5', 'Floor2 out of 4', 'Floor3 out of 6', 'FloorGround out of 10', 'Floor3 out of 4', 'Floor7 out of 7', 'Floor4 out of 7', 'FloorGround out of 3', 'Floor1 out of 3', 'FloorGround out of 2', 'FloorGround out of 5', 'Floor1 out of 3', 'Floor3 out of 4', 'Floor3 out of 3', 'FloorGround out of 4'] 210 210 210 210 210 210 210 213
# inserting NaN's at index positions
Owner.append(np.nan)
# inserting NaN's at index positions
Carpet_Area.insert(8,np.nan)
Carpet_Area.insert(45,np.nan)
Carpet_Area.insert(67,np.nan)
Carpet_Area.insert(124,np.nan)
Carpet_Area.insert(130,np.nan)
Carpet_Area.insert(137,np.nan)
# inserting NaN's at index positions
per_sqt.insert(5,np.nan)
per_sqt.insert(2,np.nan)
per_sqt.insert(32,np.nan)
per_sqt.insert(59,np.nan)
per_sqt.insert(65,np.nan)
per_sqt.insert(67,np.nan)
per_sqt.insert(71,np.nan)
per_sqt.insert(88,np.nan)
per_sqt.insert(184,np.nan)
per_sqt.insert(196,np.nan)
per_sqt.insert(197,np.nan)
per_sqt.insert(207,np.nan)
# Creation the Dict with cloumns names as strings and values as column names in which values are present.
df3 = {'City':City,
'Area':Area,
'Owner':Owner,
'BHK':BHK,
'Floor':Floor[0:210],
'price':price,
'Carpet_Area':Carpet_Area,
'per_sqt':per_sqt
}
# creation of DataFrame
Project3=pd.DataFrame(df3)
Project3
| City | Area | Owner | BHK | Floor | price | Carpet_Area | per_sqt | |
|---|---|---|---|---|---|---|---|---|
| 0 | greater-noida | Zeta 1 | RAHUL TRIVEDI | 2 | Floor17 out of 22 | ₹47.5 Lac | 1200 | ₹3958 |
| 1 | greater-noida | Sector P4 | R Bhattacharjee | 3 | Floor7 out of 9 | ₹46 Lac | 1120 | ₹3286 |
| 2 | greater-noida | Eta 2 | Ashutosh | 3 | Floor8 out of 25 | ₹37 Lac | 995 | NaN |
| 3 | greater-noida | Sector 3 | Sandeep gautam | 2 | Floor1 out of 2 | ₹24 Lac | 970 | ₹3719 |
| 4 | greater-noida | Zeta 1 | vijay | 2 | Floor8 out of 16 | ₹48 Lac | 895 | ₹2474 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 205 | mysore | Nayana A.S | 2 | FloorGround out of 3 | ₹40 Lac | 1070 | ₹6389 | |
| 206 | mysore | Kuvempunagar | Srikanth | 2 | Floor1 out of 3 | ₹69 Lac | 1080 | ₹5000 |
| 207 | mysore | MANJUNATHA | 2 | FloorGround out of 2 | ₹60 Lac | 1200 | NaN | |
| 208 | mysore | V V Mohalla | Chakko | 2 | FloorGround out of 5 | ₹60 Lac | 900 | ₹6667 |
| 209 | mysore | Sriramapura | NaN | 2 | Floor1 out of 3 | ₹72 Lac | 1115 | ₹6457 |
210 rows × 8 columns
# conversion of DataFrame to csv
Project3.to_csv('DF3.csv')
# DataFrame concatenation into the final DF
FINALPROJECT = pd.concat([Project1,Project2,Project3])
# Resetting the index numbers after formaton of the finam DF
FINALPROJECT = FINALPROJECT.reset_index()
# Final uncleaned DataFrame
FINALPROJECT
| index | City | Area | Owner | BHK | Floor | price | Carpet_Area | per_sqt | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | bangalore | Kattigenahalli | Omkar Pandey | 3 | FloorGround out of 4 | ₹60 Lac | 1000 | ₹4950 |
| 1 | 1 | bangalore | Ayodaya Nagar | gokul | 2 | FloorGround out of 4 | ₹80 Lac | 1150 | ₹6957 |
| 2 | 2 | bangalore | Electronic City | Thayumanavan | 2 | Floor4 out of 4 | ₹48 Lac | 1060 | ₹4528 |
| 3 | 3 | bangalore | Tumkur Road | Prasanna | 3 | Floor14 out of 15 | ₹1.35 Cr | 1790 | ₹7542 |
| 4 | 4 | bangalore | Kudlu Gate | Akash Akash | 2 | Floor2 out of 4 | ₹61 Lac | 803 | ₹5706 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 415 | 205 | mysore | Nayana A.S | 2 | FloorGround out of 3 | ₹40 Lac | 1070 | ₹6389 | |
| 416 | 206 | mysore | Kuvempunagar | Srikanth | 2 | Floor1 out of 3 | ₹69 Lac | 1080 | ₹5000 |
| 417 | 207 | mysore | MANJUNATHA | 2 | FloorGround out of 2 | ₹60 Lac | 1200 | NaN | |
| 418 | 208 | mysore | V V Mohalla | Chakko | 2 | FloorGround out of 5 | ₹60 Lac | 900 | ₹6667 |
| 419 | 209 | mysore | Sriramapura | NaN | 2 | Floor1 out of 3 | ₹72 Lac | 1115 | ₹6457 |
420 rows × 9 columns
# final scraped data ---> uncleaned
FINALPROJECT.to_csv('Magicbrics_Project.csv')
Exploratory Data_Analysis
DataFrame Cleaning
# import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
plt.style.use('bmh')
import warnings
warnings.filterwarnings('ignore')
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'
# for HD visualizations
%config InlineBackend.figure_format='retina'
# load the Dataset
dff = pd.read_csv(r'C:\Users\GUDLA RAGUWING\Data Science Course\MagicBrics_Webscraping_project\Magicbrics_Project.csv')
dff.head(20)
| Unnamed: 0 | index | City | Area | Owner | BHK | Floor | price | Carpet_Area | per_sqt | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 0 | bangalore | Kattigenahalli | Omkar Pandey | 3 | FloorGround out of 4 | ₹60 Lac | 1000.0 | ₹4950 |
| 1 | 1 | 1 | bangalore | Ayodaya Nagar | gokul | 2 | FloorGround out of 4 | ₹80 Lac | 1150.0 | ₹6957 |
| 2 | 2 | 2 | bangalore | Electronic City | Thayumanavan | 2 | Floor4 out of 4 | ₹48 Lac | 1060.0 | ₹4528 |
| 3 | 3 | 3 | bangalore | Tumkur Road | Prasanna | 3 | Floor14 out of 15 | ₹1.35 Cr | 1790.0 | ₹7542 |
| 4 | 4 | 4 | bangalore | Kudlu Gate | Akash Akash | 2 | Floor2 out of 4 | ₹61 Lac | 803.0 | ₹5706 |
| 5 | 5 | 5 | bangalore | Hal Stage 2 | av nath | 3 | Floor1 out of 4 | ₹1.30 Cr | 1640.0 | ₹7303 |
| 6 | 6 | 6 | bangalore | Murugeshpalya | Sangeeta Pillai | 2 | Floor3 out of 5 | ₹50 Lac | 1250.0 | ₹5000 |
| 7 | 7 | 7 | bangalore | Begur Road | ganga k | 3 | Basement out of 17 | ₹1.80 Cr | 1588.0 | ₹11335 |
| 8 | 8 | 8 | bangalore | Judicial Layout | Anjan K | 3 | Floor3 out of 14 | ₹1 Cr | 1000.0 | ₹8889 |
| 9 | 9 | 9 | bangalore | Bellandur | Omesh Saraf | 2 | Floor2 out of 10 | ₹1.02 Cr | 1465.0 | ₹6997 |
| 10 | 10 | 10 | bangalore | Kasturi Nagar | Mohd Hussain | 3 | FloorGround out of 7 | ₹1.45 Cr | 1500.0 | ₹8146 |
| 11 | 11 | 11 | bangalore | Koramangala Block 1 | Rahul Jain | 3 | Floor16 out of 16 | ₹3.85 Cr | 1418.0 | ₹20632 |
| 12 | 12 | 12 | bangalore | Sarjapur Road | Gaurav Kumar GAURAV | 3 | FloorGround out of 5 | ₹70 Lac | 1350.0 | ₹5655 |
| 13 | 13 | 13 | bangalore | Devanahalli | Ancy | 2 | Floor1 out of 7 | ₹85 Lac | 1000.0 | ₹6071 |
| 14 | 14 | 14 | bangalore | Hanumantha Nagar | Anoop | 2 | Floor2 out of 4 | ₹98 Lac | 1250.0 | ₹9800 |
| 15 | 15 | 15 | bangalore | geetha | 2 | Floor2 out of 5 | ₹60 Lac | 857.0 | ₹6367 | |
| 16 | 16 | 16 | bangalore | Doddakannelli | Omkar Omkar | 2 | Floor3 out of 6 | ₹78 Lac | 753.0 | ₹5158 |
| 17 | 17 | 17 | bangalore | Electronic City | ruchika | 2 | Floor1 out of 7 | ₹57 Lac | 1186.0 | ₹6374 |
| 18 | 18 | 18 | bangalore | Sarjapur Road | jeswanth | 3 | Floor1 out of 4 | ₹1.08 Cr | 1250.0 | ₹4553 |
| 19 | 19 | 19 | bangalore | Jigani | vikas saxena | 3 | Floor6 out of 12 | ₹57 Lac | 770.0 | ₹4545 |
# Droping the unwnated columns
dff.drop(['Unnamed: 0','index'], axis = 1, inplace = True)
dff.head()
| City | Area | Owner | BHK | Floor | price | Carpet_Area | per_sqt | |
|---|---|---|---|---|---|---|---|---|
| 0 | bangalore | Kattigenahalli | Omkar Pandey | 3 | FloorGround out of 4 | ₹60 Lac | 1000.00 | ₹4950 |
| 1 | bangalore | Ayodaya Nagar | gokul | 2 | FloorGround out of 4 | ₹80 Lac | 1150.00 | ₹6957 |
| 2 | bangalore | Electronic City | Thayumanavan | 2 | Floor4 out of 4 | ₹48 Lac | 1060.00 | ₹4528 |
| 3 | bangalore | Tumkur Road | Prasanna | 3 | Floor14 out of 15 | ₹1.35 Cr | 1790.00 | ₹7542 |
| 4 | bangalore | Kudlu Gate | Akash Akash | 2 | Floor2 out of 4 | ₹61 Lac | 803.00 | ₹5706 |
dff.duplicated()
dff.duplicated().value_counts()
0 False
1 False
2 False
3 False
4 False
...
415 False
416 False
417 False
418 False
419 False
Length: 420, dtype: bool
False 420 dtype: int64
dff.isna()
| City | Area | Owner | BHK | Floor | price | Carpet_Area | per_sqt | |
|---|---|---|---|---|---|---|---|---|
| 0 | False | False | False | False | False | False | False | False |
| 1 | False | False | False | False | False | False | False | False |
| 2 | False | False | False | False | False | False | False | False |
| 3 | False | False | False | False | False | False | False | False |
| 4 | False | False | False | False | False | False | False | False |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 415 | False | False | False | False | False | False | False | False |
| 416 | False | False | False | False | False | False | False | False |
| 417 | False | False | False | False | False | False | False | True |
| 418 | False | False | False | False | False | False | False | False |
| 419 | False | False | True | False | False | False | False | False |
420 rows × 8 columns
dff.isna().sum()
City 0 Area 0 Owner 1 BHK 0 Floor 3 price 0 Carpet_Area 8 per_sqt 29 dtype: int64
dff[dff.per_sqt.isna()]
| City | Area | Owner | BHK | Floor | price | Carpet_Area | per_sqt | |
|---|---|---|---|---|---|---|---|---|
| 32 | mumbai | Borivali West | Sachin Kunder | 2 | Floor5 out of 15 | ₹2.25 Cr | 1160.00 | NaN |
| 39 | mumbai | Mahim West | Satyajit Satyajit | 2 | Floor6 out of 12 | ₹3.75 Cr | 800.00 | NaN |
| 43 | mumbai | Haroon mansuri | 2 | Floor21 out of 21 | ₹60 Lac | 1870.00 | NaN | |
| 60 | chennai | Mannivakkam Chennai | Yogesh Baskaran | 2 | FloorGround out of 2 | ₹55 Lac | 800.00 | NaN |
| 67 | chennai | Mogappair Chennai | Elango Rajendran | 2 | Floor16 out of 16 | ₹53 Lac | 852.00 | NaN |
| 115 | visakhapatnam | Tagarapuvalsa | Vision Properties | 3 | Floor3 out of 5 | ₹38.9 Lac | 800.00 | NaN |
| 116 | visakhapatnam | Parawada | Honey Group | 2 | Floor5 out of 5 | ₹26 Lac | 900.00 | NaN |
| 117 | visakhapatnam | PM Palem | New Living Properties | 2 | Floor3 out of 5 | ₹42 Lac | 650.00 | NaN |
| 118 | visakhapatnam | Madhurwada | podilapu simhachalam | 2 | NaN | ₹38 Lac | 800.00 | NaN |
| 119 | visakhapatnam | MVP Colony | Dinakar | 2 | NaN | ₹78 Lac | NaN | NaN |
| 131 | ranchi | Hesag | rajesh singh | 3 | Floor4 out of 6 | ₹77 Lac | 1212.00 | NaN |
| 145 | ranchi | taha | 5 | FloorGround out of 8 | ₹1.05 Cr | 1650.00 | NaN | |
| 173 | haridwar | Jwalapur | Abhishek chaudhary | 2 | Floor3 out of 3 | ₹37 Lac | 900.00 | NaN |
| 191 | vadodara | Nani Bapod | Umakant parmar | 1 | Floor1 out of 3 | ₹7.5 Lac | 1010.00 | NaN |
| 192 | vadodara | Race Course circle | Akshat Dani | 2 | Floor3 out of 5 | ₹45 Lac | 850.00 | NaN |
| 199 | vadodara | Bhayli | Shashank | 2 | Floor4 out of 4 | ₹35 Lac | 1150.00 | NaN |
| 204 | vadodara | Sayajipura | Ankur Javia | 2 | Floor5 out of 5 | ₹19 Lac | 1580.00 | NaN |
| 212 | greater-noida | Eta 2 | Ashutosh | 3 | Floor8 out of 25 | ₹37 Lac | 995.00 | NaN |
| 216 | greater-noida | Greater Noida West | Kamal Bansal | 4 | Floor18 out of 32 | ₹1 Cr | 2364.00 | NaN |
| 242 | gurgaon | Kendriya Vihar | Arun Yadav | 3 | Floor10 out of 14 | ₹1.50 Cr | 1500.00 | NaN |
| 269 | gurgaon | Sector 65 | Manjunath S | 3 | FloorGround out of 32 | ₹5 Cr | 3069.00 | NaN |
| 275 | mangalore | Kulai | Devdhar Shetty | 2 | Floor4 out of 5 | ₹65 Lac | 906.00 | NaN |
| 277 | mangalore | Ujire | Stany | Floor3 out of 4 | ₹10.1 Lac | NaN | NaN | |
| 281 | mangalore | Shakti Nagar | Iqbal | 2 | Floor5 out of 5 | ₹40 Lac | 1166.00 | NaN |
| 298 | mangalore | Nandigudda | SNEHA | 3 | Floor3 out of 5 | ₹68.5 Lac | 1341.00 | NaN |
| 394 | mysore | Yadavgiri | Jagadeesha | 2 | Floor2 out of 4 | ₹95 Lac | 1280.00 | NaN |
| 406 | mysore | Vidyarayanapuram | Narasimha murthy | 3 | Floor1 out of 4 | ₹60 Lac | 1553.00 | NaN |
| 407 | mysore | Srirangapatnam | Vijay Menon | 3 | Floor3 out of 4 | ₹75 Lac | 1875.00 | NaN |
| 417 | mysore | MANJUNATHA | 2 | FloorGround out of 2 | ₹60 Lac | 1200.00 | NaN |
dff[dff.Carpet_Area.isna()]
| City | Area | Owner | BHK | Floor | price | Carpet_Area | per_sqt | |
|---|---|---|---|---|---|---|---|---|
| 119 | visakhapatnam | MVP Colony | Dinakar | 2 | NaN | ₹78 Lac | NaN | NaN |
| 209 | vadodara | Jiten chokshi | 4 | NaN | ₹1.20 Cr | NaN | ₹4615 | |
| 218 | greater-noida | Yamuna Expressway | Harkesh Sharma | 1 | Floor1 out of 4 | ₹15 Lac | NaN | ₹4091 |
| 255 | gurgaon | New Colony | Rahul | 3 | Floor4 out of 9 | ₹85 Lac | NaN | ₹8213 |
| 277 | mangalore | Ujire | Stany | Floor3 out of 4 | ₹10.1 Lac | NaN | NaN | |
| 334 | agra | Dayal Bagh | Shikha | 5 | Floor3 out of 4 | ₹1.35 Cr | NaN | ₹4500000 |
| 340 | agra | Dayal Bagh | Akshit Rajoriya | 3 | Floor1 out of 10 | ₹47 Lac | NaN | ₹4695 |
| 347 | agra | Fatehabad Road | PRAVEEN KUMAR | 3 | Floor7 out of 10 | ₹55 Lac | NaN | ₹2555 |
dff[dff.Floor.isna()]
| City | Area | Owner | BHK | Floor | price | Carpet_Area | per_sqt | |
|---|---|---|---|---|---|---|---|---|
| 118 | visakhapatnam | Madhurwada | podilapu simhachalam | 2 | NaN | ₹38 Lac | 800.00 | NaN |
| 119 | visakhapatnam | MVP Colony | Dinakar | 2 | NaN | ₹78 Lac | NaN | NaN |
| 209 | vadodara | Jiten chokshi | 4 | NaN | ₹1.20 Cr | NaN | ₹4615 |
dff[dff.Owner.isna()]
| City | Area | Owner | BHK | Floor | price | Carpet_Area | per_sqt | |
|---|---|---|---|---|---|---|---|---|
| 419 | mysore | Sriramapura | NaN | 2 | Floor1 out of 3 | ₹72 Lac | 1115.00 | ₹6457 |
dff.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 420 entries, 0 to 419 Data columns (total 8 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 City 420 non-null object 1 Area 420 non-null object 2 Owner 419 non-null object 3 BHK 420 non-null object 4 Floor 417 non-null object 5 price 420 non-null object 6 Carpet_Area 412 non-null float64 7 per_sqt 391 non-null object dtypes: float64(1), object(7) memory usage: 26.4+ KB
dff[['BHK','price','Carpet_Area','per_sqt']].iloc[40:50]
| BHK | price | Carpet_Area | per_sqt | |
|---|---|---|---|---|
| 40 | 2 | ₹2.70 Cr | 450.00 | ₹11529 |
| 41 | 1 | ₹1.02 Cr | 430.00 | ₹38459 |
| 42 | 1 | ₹95 Lac | 675.00 | ₹7242 |
| 43 | 2 | ₹60 Lac | 1870.00 | NaN |
| 44 | 4 | ₹15 Cr | 820.00 | ₹10000 |
| 45 | 2 | ₹32 Lac | 850.00 | ₹14583 |
| 46 | 2 | ₹98 Lac | 1172.00 | ₹5376 |
| 47 | 3 | ₹4.89 Cr | 404.00 | ₹12042 |
| 48 | 1 | ₹44.9 Lac | 575.00 | ₹13669 |
| 49 | 2 | ₹70 Lac | 507.00 | ₹19375 |
dff.per_sqt=dff.per_sqt.str.replace('₹', '')
dff.price= dff.price.str.replace('₹','')
dff.price = dff.price.str.replace('Cr','00000')
dff.price = dff.price.str.replace('Lac','00000')
dff.price=dff.price.str.replace('.','')
dff.price=dff.price.str.replace(' ','')
dff.Floor=dff.Floor.str.replace('Floor','')
dff.BHK=dff.BHK.str.replace(' ','3')
dff.rename(columns = {'price':'price_in_crores', 'per_sqt':'per_sqt_rupees'}, inplace = True)
dff.price_in_crores=dff.price_in_crores.astype(int)
dff.head(50)
| City | Area | Owner | BHK | Floor | price_in_crores | Carpet_Area | per_sqt_rupees | |
|---|---|---|---|---|---|---|---|---|
| 0 | bangalore | Kattigenahalli | Omkar Pandey | 3 | Ground out of 4 | 6000000 | 1000.00 | 4950 |
| 1 | bangalore | Ayodaya Nagar | gokul | 2 | Ground out of 4 | 8000000 | 1150.00 | 6957 |
| 2 | bangalore | Electronic City | Thayumanavan | 2 | 4 out of 4 | 4800000 | 1060.00 | 4528 |
| 3 | bangalore | Tumkur Road | Prasanna | 3 | 14 out of 15 | 13500000 | 1790.00 | 7542 |
| 4 | bangalore | Kudlu Gate | Akash Akash | 2 | 2 out of 4 | 6100000 | 803.00 | 5706 |
| 5 | bangalore | Hal Stage 2 | av nath | 3 | 1 out of 4 | 13000000 | 1640.00 | 7303 |
| 6 | bangalore | Murugeshpalya | Sangeeta Pillai | 2 | 3 out of 5 | 5000000 | 1250.00 | 5000 |
| 7 | bangalore | Begur Road | ganga k | 3 | Basement out of 17 | 18000000 | 1588.00 | 11335 |
| 8 | bangalore | Judicial Layout | Anjan K | 3 | 3 out of 14 | 100000 | 1000.00 | 8889 |
| 9 | bangalore | Bellandur | Omesh Saraf | 2 | 2 out of 10 | 10200000 | 1465.00 | 6997 |
| 10 | bangalore | Kasturi Nagar | Mohd Hussain | 3 | Ground out of 7 | 14500000 | 1500.00 | 8146 |
| 11 | bangalore | Koramangala Block 1 | Rahul Jain | 3 | 16 out of 16 | 38500000 | 1418.00 | 20632 |
| 12 | bangalore | Sarjapur Road | Gaurav Kumar GAURAV | 3 | Ground out of 5 | 7000000 | 1350.00 | 5655 |
| 13 | bangalore | Devanahalli | Ancy | 2 | 1 out of 7 | 8500000 | 1000.00 | 6071 |
| 14 | bangalore | Hanumantha Nagar | Anoop | 2 | 2 out of 4 | 9800000 | 1250.00 | 9800 |
| 15 | bangalore | geetha | 2 | 2 out of 5 | 6000000 | 857.00 | 6367 | |
| 16 | bangalore | Doddakannelli | Omkar Omkar | 2 | 3 out of 6 | 7800000 | 753.00 | 5158 |
| 17 | bangalore | Electronic City | ruchika | 2 | 1 out of 7 | 5700000 | 1186.00 | 6374 |
| 18 | bangalore | Sarjapur Road | jeswanth | 3 | 1 out of 4 | 10800000 | 1250.00 | 4553 |
| 19 | bangalore | Jigani | vikas saxena | 3 | 6 out of 12 | 5700000 | 770.00 | 4545 |
| 20 | bangalore | Ranju | 2 | 9 out of 9 | 3500000 | 2150.00 | 7500 | |
| 21 | bangalore | tezal | 4 | 1 out of 4 | 18000000 | 1900.00 | 12500 | |
| 22 | bangalore | Jaya Nagar Block 3 | shravan | 3 | 2 out of 4 | 27500000 | 1460.00 | 7705 |
| 23 | bangalore | whitefield | Bhav | 3 | 5 out of 5 | 11200000 | 1885.00 | 5305 |
| 24 | bangalore | whitefield | Nagananda | 3 | 2 out of 4 | 100000 | 1950.00 | 7436 |
| 25 | bangalore | Harlur | MD Fuzail | 3 | 1 out of 3 | 14500000 | 1250.00 | 5333 |
| 26 | bangalore | Sarjapur Road | ashwanth | 3 | 13 out of 27 | 8000000 | 1820.00 | 7418 |
| 27 | bangalore | Hoodi | Partha Sarma | 3 | 5 out of 13 | 13500000 | 2060.00 | 10194 |
| 28 | bangalore | Haralur Ma | Shekar | 3 | 3 out of 3 | 21000000 | 740.00 | 4650 |
| 29 | bangalore | Yelahanka | Sashank Constructions | 2 | Ground out of 4 | 51200000 | 331.00 | 26471 |
| 30 | mumbai | Daulat Nagar | K SH | 1 | 8 out of 9 | 7300000 | 310.00 | 23276 |
| 31 | mumbai | Samel Pada | NIYATI | 1 | 1 out of 5 | 2200000 | 610.00 | 16667 |
| 32 | mumbai | Borivali West | Sachin Kunder | 2 | 5 out of 15 | 22500000 | 1160.00 | NaN |
| 33 | mumbai | Goregaon West | Motashaw Motashaw | 3 | 4 out of 7 | 27000000 | 665.00 | 23636 |
| 34 | mumbai | Vasai East | MONCY BHASKAR | 1 | Ground out of 76 | 4100000 | 1052.00 | 37500 |
| 35 | mumbai | Worli | Anam khan | 3 | 4 out of 4 | 59900000 | 685.00 | 27136 |
| 36 | mumbai | Kurla East | Milind Desai | 2 | Ground out of 3 | 12000000 | 400.00 | 18545 |
| 37 | mumbai | Andheri East | Jas Baljeet J | 1 | 1 out of 6 | 13000000 | 332.00 | 60000 |
| 38 | mumbai | Kandivali East | Gangaram Dhuri | 1 | 5 out of 20 | 6500000 | 776.00 | 5047 |
| 39 | mumbai | Mahim West | Satyajit Satyajit | 2 | 6 out of 12 | 37500000 | 800.00 | NaN |
| 40 | mumbai | Borivali West | vasant ahir | 2 | 7 out of 7 | 27000000 | 450.00 | 11529 |
| 41 | mumbai | Chembur West | Rajendra | 1 | Ground out of 4 | 10200000 | 430.00 | 38459 |
| 42 | mumbai | Aditi Shah | 1 | 1 out of 3 | 9500000 | 675.00 | 7242 | |
| 43 | mumbai | Haroon mansuri | 2 | 21 out of 21 | 6000000 | 1870.00 | NaN | |
| 44 | mumbai | Dadar West | kishor kishor | 4 | 3 out of 3 | 1500000 | 820.00 | 10000 |
| 45 | mumbai | Boisar West | Medha Naik | 2 | 7 out of 21 | 3200000 | 850.00 | 14583 |
| 46 | mumbai | Mira Road | Anita | 2 | 5 out of 7 | 9800000 | 1172.00 | 5376 |
| 47 | mumbai | K | jagdish Dassani | 3 | 5 out of 7 | 48900000 | 404.00 | 12042 |
| 48 | mumbai | Virar West | Raghav Sharma | 1 | 4 out of 15 | 44900000 | 575.00 | 13669 |
| 49 | mumbai | Vasai West | nandan lanjekar | 2 | 9 out of 15 | 7000000 | 507.00 | 19375 |
np.round(dff.isna().sum()/len(dff)*100, 2).astype(str) + '%'
City 0.0% Area 0.0% Owner 0.24% BHK 0.0% Floor 0.71% price_in_crores 0.0% Carpet_Area 1.9% per_sqt_rupees 6.9% dtype: object
% is more than 50% so am going for filling the missing values with mean|median|mode based on the column type()dff.BHK.isna().sum()
0
dff.BHK = dff.BHK.astype(str)
sns.boxplot(data=dff.BHK);
dff.Area.iloc[[47,88,102,120,122,144,196,205,280,289]]
# Replace these values with NaN's and then fill them with mode of the column
droplist1=[47,88,102,120,122,144,196,205,280,289]
dff.Area= dff.Area.drop(droplist1,axis=0)
dff.Area.iloc[102]
47 K 88 Gu 102 Poth 120 H 122 S 144 H 196 Bill 205 Bill 280 K 289 Jepp Name: Area, dtype: object
nan
dff = dff.replace(r'^\s*$', np.nan, regex=True)
print(dff.head(20))
City Area Owner BHK \
0 bangalore Kattigenahalli Omkar Pandey 3
1 bangalore Ayodaya Nagar gokul 2
2 bangalore Electronic City Thayumanavan 2
3 bangalore Tumkur Road Prasanna 3
4 bangalore Kudlu Gate Akash Akash 2
5 bangalore Hal Stage 2 av nath 3
6 bangalore Murugeshpalya Sangeeta Pillai 2
7 bangalore Begur Road ganga k 3
8 bangalore Judicial Layout Anjan K 3
9 bangalore Bellandur Omesh Saraf 2
10 bangalore Kasturi Nagar Mohd Hussain 3
11 bangalore Koramangala Block 1 Rahul Jain 3
12 bangalore Sarjapur Road Gaurav Kumar GAURAV 3
13 bangalore Devanahalli Ancy 2
14 bangalore Hanumantha Nagar Anoop 2
15 bangalore NaN geetha 2
16 bangalore Doddakannelli Omkar Omkar 2
17 bangalore Electronic City ruchika 2
18 bangalore Sarjapur Road jeswanth 3
19 bangalore Jigani vikas saxena 3
Floor price_in_crores Carpet_Area per_sqt_rupees
0 Ground out of 4 6000000 1000.00 4950
1 Ground out of 4 8000000 1150.00 6957
2 4 out of 4 4800000 1060.00 4528
3 14 out of 15 13500000 1790.00 7542
4 2 out of 4 6100000 803.00 5706
5 1 out of 4 13000000 1640.00 7303
6 3 out of 5 5000000 1250.00 5000
7 Basement out of 17 18000000 1588.00 11335
8 3 out of 14 100000 1000.00 8889
9 2 out of 10 10200000 1465.00 6997
10 Ground out of 7 14500000 1500.00 8146
11 16 out of 16 38500000 1418.00 20632
12 Ground out of 5 7000000 1350.00 5655
13 1 out of 7 8500000 1000.00 6071
14 2 out of 4 9800000 1250.00 9800
15 2 out of 5 6000000 857.00 6367
16 3 out of 6 7800000 753.00 5158
17 1 out of 7 5700000 1186.00 6374
18 1 out of 4 10800000 1250.00 4553
19 6 out of 12 5700000 770.00 4545
# filling Area column with mode of the column
dff.Area.value_counts()
dff.Area.mode()
dff.Area.mode().values
Ar_mode=dff.Area.mode().values[0]
dff.Area.fillna(Ar_mode, inplace=True)
dff.Area.isna().sum()
Alwar Bypass Road 22
Greater Noida West 7
Jwalapur 6
Sikandra 5
Dayal Bagh 5
..
Sector 2 Masibari 1
Upper Chutia 1
Hazaribag Road 1
Lalpur 1
Sriramapura 1
Name: Area, Length: 261, dtype: int64
0 Alwar Bypass Road Name: Area, dtype: object
array([' Alwar Bypass Road '], dtype=object)
0
# filling Owner column with mode of the column
dff.Owner.value_counts()
dff.Owner.mode()
dff.Owner.mode().values
Ow_mode=dff.Owner.mode().values[0]
dff.Owner.fillna(Ow_mode, inplace=True)
dff.Owner.isna().sum()
Prasanna 2
Sunil Kumar 2
vijay 2
Srikanth 2
sanjeev 2
..
Mukesh Mukesh 1
Manit Kumar 1
Neeraj 1
Abhishek 1
Chakko 1
Name: Owner, Length: 414, dtype: int64
0 Prasanna 1 Srikanth 2 Sunil Kumar 3 sanjeev 4 vijay Name: Owner, dtype: object
array([' Prasanna', ' Srikanth', ' Sunil Kumar', ' sanjeev', ' vijay'],
dtype=object)
0
dff[dff.Owner.isna()]
| City | Area | Owner | BHK | Floor | price_in_crores | Carpet_Area | per_sqt_rupees |
|---|
# filling Floor column with mode of the column
dff.Floor.value_counts()
dff.Floor.mode()
dff.Floor.mode().values
FL_mode=dff.Floor.mode().values[0]
dff.Floor.fillna(FL_mode, inplace=True)
dff.Floor.isna().sum()
3 out of 4 24
1 out of 4 22
2 out of 4 20
4 out of 4 16
2 out of 5 14
..
17 out of 19 1
26 out of 27 1
12 out of 24 1
Ground out of 76 1
Ground out of 10 1
Name: Floor, Length: 145, dtype: int64
0 3 out of 4 Name: Floor, dtype: object
array(['3 out of 4'], dtype=object)
0
dff[dff.Floor.isna()]
| City | Area | Owner | BHK | Floor | price_in_crores | Carpet_Area | per_sqt_rupees |
|---|
dff.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 420 entries, 0 to 419 Data columns (total 8 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 City 420 non-null object 1 Area 420 non-null object 2 Owner 420 non-null object 3 BHK 420 non-null object 4 Floor 420 non-null object 5 price_in_crores 420 non-null int32 6 Carpet_Area 412 non-null float64 7 per_sqt_rupees 391 non-null object dtypes: float64(1), int32(1), object(6) memory usage: 24.7+ KB
dff.per_sqt_rupees.skew()
# As you can observe my per_sqt skewness in extremely high that represents that huge outliers are present.
# Or intensional put there.
19.756402504713776
# creating the droplist
droplist=[30,31,49,34,37,41,51,334]
# droping the droplist
dff.per_sqt_rupees= dff.per_sqt_rupees.drop(droplist,axis=0)
dff.per_sqt_rupees.iloc[334]
nan
dff.per_sqt_rupees=dff.per_sqt_rupees.astype(float)
# calculatin the median using pandas and filling using fillna()
per_median=dff.per_sqt_rupees.median()
dff.per_sqt_rupees.fillna(per_median, inplace=True)
# type casting
dff.per_sqt_rupees=dff.per_sqt_rupees.astype(int)
dff[dff.per_sqt_rupees.isna()]
| City | Area | Owner | BHK | Floor | price_in_crores | Carpet_Area | per_sqt_rupees |
|---|
dff.per_sqt_rupees.skew()
# As you can see that the skewness is significantly reduced
3.503912518035357
import scipy.stats as stats
Q1, Q2, Q3 = tuple(dff.per_sqt_rupees.quantile(q = [0.25,0.5,0.75]).values)
print(Q1, Q2, Q3)
IQR = Q3-Q1
UL = Q3 + 1.5*IQR
LL = Q1-1.5*IQR
print(IQR, UL, LL)
3295.0 4274.0 5481.25 2186.25 8760.625 15.625
dff[(dff.per_sqt_rupees > UL) | (dff.per_sqt_rupees < LL)]
| City | Area | Owner | BHK | Floor | price_in_crores | Carpet_Area | per_sqt_rupees | |
|---|---|---|---|---|---|---|---|---|
| 7 | bangalore | Begur Road | ganga k | 3 | Basement out of 17 | 18000000 | 1588.00 | 11335 |
| 8 | bangalore | Judicial Layout | Anjan K | 3 | 3 out of 14 | 100000 | 1000.00 | 8889 |
| 11 | bangalore | Koramangala Block 1 | Rahul Jain | 3 | 16 out of 16 | 38500000 | 1418.00 | 20632 |
| 14 | bangalore | Hanumantha Nagar | Anoop | 2 | 2 out of 4 | 9800000 | 1250.00 | 9800 |
| 21 | bangalore | Alwar Bypass Road | tezal | 4 | 1 out of 4 | 18000000 | 1900.00 | 12500 |
| 27 | bangalore | Hoodi | Partha Sarma | 3 | 5 out of 13 | 13500000 | 2060.00 | 10194 |
| 29 | bangalore | Yelahanka | Sashank Constructions | 2 | Ground out of 4 | 51200000 | 331.00 | 26471 |
| 33 | mumbai | Goregaon West | Motashaw Motashaw | 3 | 4 out of 7 | 27000000 | 665.00 | 23636 |
| 35 | mumbai | Worli | Anam khan | 3 | 4 out of 4 | 59900000 | 685.00 | 27136 |
| 36 | mumbai | Kurla East | Milind Desai | 2 | Ground out of 3 | 12000000 | 400.00 | 18545 |
| 40 | mumbai | Borivali West | vasant ahir | 2 | 7 out of 7 | 27000000 | 450.00 | 11529 |
| 44 | mumbai | Dadar West | kishor kishor | 4 | 3 out of 3 | 1500000 | 820.00 | 10000 |
| 45 | mumbai | Boisar West | Medha Naik | 2 | 7 out of 21 | 3200000 | 850.00 | 14583 |
| 47 | mumbai | Alwar Bypass Road | jagdish Dassani | 3 | 5 out of 7 | 48900000 | 404.00 | 12042 |
| 48 | mumbai | Virar West | Raghav Sharma | 1 | 4 out of 15 | 44900000 | 575.00 | 13669 |
| 50 | mumbai | Bhandup West | amit chalke | 1 | 17 out of 18 | 10500000 | 558.00 | 21333 |
| 52 | mumbai | Goregaon East | Ritu | 2 | 7 out of 7 | 12500000 | 1130.00 | 22951 |
| 53 | mumbai | Marol Maroshi Road | Khuzema Tajir | 3 | 15 out of 23 | 19000000 | 562.00 | 10695 |
| 54 | mumbai | Malad West | Dipti solanki | 2 | 3 out of 4 | 15500000 | 535.00 | 25316 |
| 59 | mumbai | Mulund West | Savith Raghavan Savith Raghavan | 2 | 1 out of 2 | 16000000 | 1044.00 | 8947 |
| 69 | chennai | Iyyappanthangal Chennai | vinothan | 2 | 1 out of 2 | 12500000 | 700.00 | 9140 |
| 73 | chennai | Velachery Chennai | Kads | 2 | 2 out of 4 | 8500000 | 464.00 | 11616 |
| 82 | chennai | AGS Colony Velachery Chennai | R Premalatha | 1 | 1 out of 2 | 3500000 | 1194.00 | 11818 |
| 223 | greater-noida | Zeta 1 | Shiv Kumar | 4 | 3 out of 6 | 9800000 | 2200.00 | 9001 |
| 240 | gurgaon | Jal Vayu Vihar | Mohit | 2 | 22 out of 25 | 8000000 | 900.00 | 8889 |
| 241 | gurgaon | Sohna Sector 32 | Nithin Abraham | 3 | 3 out of 3 | 18000000 | 1789.00 | 10061 |
| 248 | gurgaon | Sector 112 | Y S Dwivedi | 3 | 9 out of 15 | 46000000 | 3763.00 | 12224 |
| 249 | gurgaon | Sector 102 | Rahul Arora | 4 | 3 out of 14 | 26000000 | 1800.00 | 9630 |
| 253 | gurgaon | DLF City Phase 1 | sanjay kaushik | 4 | 4 out of 4 | 18500000 | 1556.00 | 11889 |
| 258 | gurgaon | Sector 47 | Pradeep Sharma | 3 | 1 out of 4 | 35000000 | 2470.00 | 14170 |
| 261 | gurgaon | DLF City Phase 1 | Jk Batra | 2 | 1 out of 18 | 13200000 | 1250.00 | 9429 |
| 263 | gurgaon | South City 2 | Rishabh Arora | 4 | 4 out of 9 | 23500000 | 1800.00 | 11190 |
| 265 | gurgaon | Sector 54 | VIVEK SHARMA | 3 | City Plot Phase 1 | 109000000 | 3650.00 | 25952 |
| 268 | gurgaon | Sector 65 | Ankush | 3 | 2 out of 4 | 22800000 | 1500.00 | 12473 |
| 413 | mysore | Hebbal 2nd Stage | Rathan | 2 | 7 out of 7 | 16300000 | 1100.00 | 14818 |
# Suppress Scientific Notation in Numpy
np.set_printoptions(suppress=True)
# Suppress Scientific Notation in Pandas
pd.set_option('display.float_format', '{:.2f}'.format)
zscore_array = stats.zscore(dff.per_sqt_rupees)
np.where(zscore_array>2,1,0).sum()
np.where(zscore_array<-2,1,0).sum()
15
0
dff[zscore_array>2]
| City | Area | Owner | BHK | Floor | price_in_crores | Carpet_Area | per_sqt_rupees | |
|---|---|---|---|---|---|---|---|---|
| 11 | bangalore | Koramangala Block 1 | Rahul Jain | 3 | 16 out of 16 | 38500000 | 1418.00 | 20632 |
| 21 | bangalore | Alwar Bypass Road | tezal | 4 | 1 out of 4 | 18000000 | 1900.00 | 12500 |
| 29 | bangalore | Yelahanka | Sashank Constructions | 2 | Ground out of 4 | 51200000 | 331.00 | 26471 |
| 33 | mumbai | Goregaon West | Motashaw Motashaw | 3 | 4 out of 7 | 27000000 | 665.00 | 23636 |
| 35 | mumbai | Worli | Anam khan | 3 | 4 out of 4 | 59900000 | 685.00 | 27136 |
| 36 | mumbai | Kurla East | Milind Desai | 2 | Ground out of 3 | 12000000 | 400.00 | 18545 |
| 45 | mumbai | Boisar West | Medha Naik | 2 | 7 out of 21 | 3200000 | 850.00 | 14583 |
| 48 | mumbai | Virar West | Raghav Sharma | 1 | 4 out of 15 | 44900000 | 575.00 | 13669 |
| 50 | mumbai | Bhandup West | amit chalke | 1 | 17 out of 18 | 10500000 | 558.00 | 21333 |
| 52 | mumbai | Goregaon East | Ritu | 2 | 7 out of 7 | 12500000 | 1130.00 | 22951 |
| 54 | mumbai | Malad West | Dipti solanki | 2 | 3 out of 4 | 15500000 | 535.00 | 25316 |
| 258 | gurgaon | Sector 47 | Pradeep Sharma | 3 | 1 out of 4 | 35000000 | 2470.00 | 14170 |
| 265 | gurgaon | Sector 54 | VIVEK SHARMA | 3 | City Plot Phase 1 | 109000000 | 3650.00 | 25952 |
| 268 | gurgaon | Sector 65 | Ankush | 3 | 2 out of 4 | 22800000 | 1500.00 | 12473 |
| 413 | mysore | Hebbal 2nd Stage | Rathan | 2 | 7 out of 7 | 16300000 | 1100.00 | 14818 |
# Extreme values/Outliers cause variance
dff.per_sqt_rupees.var()
dff.per_sqt_rupees.std()
12627149.27491759
3553.4700329280377
from sklearn.preprocessing import StandardScaler, MinMaxScaler
# Normalization
mm1 = MinMaxScaler()
dff1 = mm1.fit_transform(dff.per_sqt_rupees.values.reshape(-1,1))
dff1.min()
dff1.max()
0.0
1.0
sns.histplot(dff1, kde = True);
sns.boxplot(dff1);
# Standardization
ss1 = StandardScaler()
dff2 = ss1.fit_transform(dff.per_sqt_rupees.values.reshape(-1,1))
round(dff2.mean())
round(dff2.std())
0
1
sns.histplot(dff2, kde = True);
sns.boxplot(dff2);
np.log(dff.per_sqt_rupees)
0 8.51
1 8.85
2 8.42
3 8.93
4 8.65
...
415 8.76
416 8.52
417 8.36
418 8.80
419 8.77
Name: per_sqt_rupees, Length: 420, dtype: float64
sns.histplot(np.log(dff.per_sqt_rupees), kde = True);
sns.boxplot(np.log(dff.per_sqt_rupees));
# Log transformation seems to be fitting well for this data
# compare the variance
dff.per_sqt_rupees.var()
np.log(dff.per_sqt_rupees).var()
12627149.27491759
0.23814150733330514
dff.per_sqt_rupees**(1/3)
0 17.04
1 19.09
2 16.54
3 19.61
4 17.87
...
415 18.56
416 17.10
417 16.23
418 18.82
419 18.62
Name: per_sqt_rupees, Length: 420, dtype: float64
sns.boxplot(dff.per_sqt_rupees**(1/3));
stats.boxcox(dff.per_sqt_rupees)
len(stats.boxcox(dff.per_sqt_rupees))
type(stats.boxcox(dff.per_sqt_rupees))
x,y = stats.boxcox(dff.per_sqt_rupees)
(array([2.11195827, 2.1180167 , 2.11020754, 2.11931871, 2.11460509,
2.11880526, 2.11215123, 2.12519303, 2.12182256, 2.11811077,
2.12051637, 2.13204143, 2.11444302, 2.11570617, 2.12322127,
2.11653027, 2.11274285, 2.11654908, 2.11031783, 2.11028263,
2.11923022, 2.12644576, 2.11965543, 2.11326993, 2.11909397,
2.11336791, 2.11905534, 2.12376861, 2.11073751, 2.13437529,
2.109034 , 2.109034 , 2.109034 , 2.13334783, 2.109034 ,
2.13459321, 2.13095717, 2.109034 , 2.11233005, 2.109034 ,
2.12541444, 2.109034 , 2.11867025, 2.109034 , 2.12350305,
2.12830742, 2.11351691, 2.12597449, 2.12754181, 2.109034 ,
2.13237023, 2.109034 , 2.13307212, 2.12442144, 2.13397708,
2.1131393 , 2.1214341 , 2.11215123, 2.10983033, 2.12191778,
2.109034 , 2.1117427 , 2.11582094, 2.1131393 , 2.11462402,
2.12092258, 2.12099264, 2.109034 , 2.11247659, 2.12222823,
2.11582379, 2.12010539, 2.10932499, 2.12551196, 2.10796302,
2.10764248, 2.11940649, 2.11323123, 2.11683285, 2.11606681,
2.10936271, 2.11706628, 2.12573432, 2.11307169, 2.11754896,
2.12023905, 2.10295901, 2.11268783, 2.11058757, 2.10764779,
2.11080974, 2.10832227, 2.10810265, 2.11951005, 2.10547099,
2.10147698, 2.11923022, 2.11215123, 2.10110878, 2.10576335,
2.10625589, 2.10794222, 2.10343587, 2.11302164, 2.10826132,
2.10301671, 2.11565705, 2.11279764, 2.10874669, 2.11118171,
2.10596111, 2.09751594, 2.11080974, 2.10878109, 2.11832878,
2.109034 , 2.109034 , 2.109034 , 2.109034 , 2.109034 ,
2.11075879, 2.10867271, 2.11333999, 2.11392701, 2.10962855,
2.10614456, 2.10639534, 2.11022082, 2.10923975, 2.10615631,
2.10585356, 2.109034 , 2.10025353, 2.10757324, 2.11385679,
2.11292096, 2.11333999, 2.09859975, 2.11064774, 2.10822054,
2.10785347, 2.109034 , 2.11412221, 2.10438151, 2.10677224,
2.109034 , 2.11001124, 2.1127867 , 2.11136186, 2.12086111,
2.09671989, 2.11021197, 2.10028757, 2.11315703, 2.09290595,
2.11369819, 2.10220465, 2.10078182, 2.10461086, 2.10240779,
2.09756577, 2.10744993, 2.10557518, 2.10472105, 2.10747145,
2.11022082, 2.10199894, 2.10680603, 2.09990929, 2.09213461,
2.10913486, 2.10209069, 2.10287208, 2.109034 , 2.10588352,
2.10576335, 2.09445579, 2.10605607, 2.10668174, 2.10055745,
2.10072394, 2.11336791, 2.09588694, 2.10696828, 2.10062424,
2.10327378, 2.09103735, 2.10086417, 2.10846348, 2.09159365,
2.09692741, 2.109034 , 2.109034 , 2.10251956, 2.10585956,
2.10335856, 2.08525251, 2.09702001, 2.09956679, 2.109034 ,
2.09267388, 2.10664198, 2.11412221, 2.10277006, 2.109034 ,
2.09670945, 2.10764779, 2.10110878, 2.09695833, 2.11058757,
2.10742299, 2.10327378, 2.109034 , 2.10607381, 2.09621186,
2.11347205, 2.109034 , 2.10882029, 2.10812323, 2.11138218,
2.10586555, 2.11696829, 2.11699158, 2.12200562, 2.10717273,
2.11128027, 2.11079278, 2.11300014, 2.11332951, 2.11328047,
2.11065203, 2.11222762, 2.09572229, 2.11094886, 2.09505992,
2.10734185, 2.10735811, 2.10940033, 2.10078182, 2.10557518,
2.12182256, 2.12358736, 2.109034 , 2.11323123, 2.10690695,
2.10980755, 2.11215123, 2.11173874, 2.12616486, 2.12297505,
2.12023905, 2.11268783, 2.11852906, 2.12581117, 2.12049573,
2.12064121, 2.10972522, 2.11468377, 2.12797043, 2.11689035,
2.11923022, 2.12267558, 2.11837462, 2.1250239 , 2.10615631,
2.13419956, 2.10858342, 2.11962895, 2.12641869, 2.109034 ,
2.10325961, 2.11654102, 2.10749294, 2.11030024, 2.10463686,
2.109034 , 2.11514851, 2.109034 , 2.11084783, 2.10166587,
2.10009949, 2.109034 , 2.11058757, 2.11731016, 2.11358562,
2.11022967, 2.11141462, 2.11654102, 2.11688252, 2.10647609,
2.11523577, 2.10476619, 2.10265266, 2.11439173, 2.11524176,
2.11019424, 2.11652758, 2.10625589, 2.109034 , 2.10466281,
2.09728488, 2.11202806, 2.10423559, 2.11604441, 2.09456176,
2.10387154, 2.10384424, 2.10360326, 2.10764779, 2.08212959,
2.09854336, 2.10189124, 2.09915379, 2.10110878, 2.10450644,
2.0968861 , 2.10038928, 2.08798098, 2.1191857 , 2.10756256,
2.10196824, 2.09456176, 2.10222735, 2.10535369, 2.09920822,
2.10535369, 2.09993535, 2.09455002, 2.09391674, 2.09903524,
2.09969934, 2.09898026, 2.11373549, 2.1018758 , 2.109034 ,
2.11604441, 2.10528537, 2.09628684, 2.10764779, 2.10110878,
2.11092788, 2.10406137, 2.09660472, 2.10240779, 2.09648881,
2.10524173, 2.10763185, 2.09706102, 2.10265266, 2.10378262,
2.09778339, 2.10542171, 2.10356854, 2.10146906, 2.09380705,
2.10622667, 2.11035294, 2.09743591, 2.10734727, 2.1154997 ,
2.1080511 , 2.08788233, 2.08939143, 2.09258287, 2.10042305,
2.0919335 , 2.093561 , 2.09137302, 2.08616579, 2.09736559,
2.09738571, 2.1088789 , 2.08778321, 2.08125326, 2.09532097,
2.0836058 , 2.0887537 , 2.09480672, 2.09931658, 2.08132016,
2.09314791, 2.0953997 , 2.08895807, 2.09502559, 2.09817231,
2.10602646, 2.1032667 , 2.08741567, 2.09990929, 2.10652777,
2.10477907, 2.10822054, 2.11934805, 2.11716853, 2.109034 ,
2.10913964, 2.11507278, 2.11191934, 2.11120642, 2.11570617,
2.11258449, 2.10871222, 2.11710473, 2.10951258, 2.10690136,
2.10764779, 2.109034 , 2.109034 , 2.10991203, 2.11215123,
2.11253995, 2.11711752, 2.11472136, 2.12849299, 2.11155565,
2.11658928, 2.11215123, 2.109034 , 2.11731016, 2.1167698 ]),
-0.4643823843215321)
2
tuple
sns.boxplot(x);
drop_list = [239,259]
dff.Carpet_Area=dff.Carpet_Area.drop(drop_list, axis=0)
Car_median=dff.Carpet_Area.median()
dff.Carpet_Area.fillna(Car_median, inplace=True)
dff.Carpet_Area.isna().sum()
0
dff[dff.Carpet_Area.isna()]
| City | Area | Owner | BHK | Floor | price_in_crores | Carpet_Area | per_sqt_rupees |
|---|
Q1, Q2, Q3 = tuple(dff.Carpet_Area.quantile(q = [0.25,0.5,0.75]).values)
print(Q1, Q2, Q3)
812.0 1052.5 1400.0
IQR = Q3-Q1
UL = Q3 + 1.5*IQR
LL = Q1-1.5*IQR
print(IQR, UL, LL)
588.0 2282.0 -70.0
dff[(dff.Carpet_Area > UL) | (dff.Carpet_Area < LL)]
| City | Area | Owner | BHK | Floor | price_in_crores | Carpet_Area | per_sqt_rupees | |
|---|---|---|---|---|---|---|---|---|
| 154 | haridwar | Laksar Road | Shakti Verma | 1 | 1 out of 4 | 14300000 | 3000.00 | 2192 |
| 200 | vadodara | Danteshwar | Pushpita Roy Choudhury | 2 | 4 out of 9 | 2500000 | 2358.00 | 2174 |
| 202 | vadodara | Gotri Sevasi Road | Deepak Singh | 3 | 9 out of 9 | 5000000 | 2350.00 | 5556 |
| 208 | vadodara | Alwar Bypass Road | Bindi shah | 3 | 2 out of 2 | 4200000 | 2600.00 | 2545 |
| 216 | greater-noida | Greater Noida West | Kamal Bansal | 4 | 18 out of 32 | 100000 | 2364.00 | 4274 |
| 221 | greater-noida | Chi 5 | AN SINHA | 4 | 11 out of 22 | 21000000 | 3210.00 | 6533 |
| 222 | greater-noida | Jaypee Greens | Nihit | 4 | 6 out of 34 | 33100000 | 3441.00 | 6542 |
| 248 | gurgaon | Sector 112 | Y S Dwivedi | 3 | 9 out of 15 | 46000000 | 3763.00 | 12224 |
| 258 | gurgaon | Sector 47 | Pradeep Sharma | 3 | 1 out of 4 | 35000000 | 2470.00 | 14170 |
| 265 | gurgaon | Sector 54 | VIVEK SHARMA | 3 | City Plot Phase 1 | 109000000 | 3650.00 | 25952 |
| 269 | gurgaon | Sector 65 | Manjunath S | 3 | Ground out of 32 | 500000 | 3069.00 | 4274 |
| 276 | mangalore | Bajpe | Gopal | 4 | 4 out of 8 | 200000 | 3030.00 | 5882 |
| 282 | mangalore | Falnir | Santosh Babu Salian | 3 | 2 out of 4 | 18000000 | 3900.00 | 4615 |
| 346 | agra | Sikandra | SHWETANG SHARMA | 5 | 3 out of 5 | 81900000 | 2410.00 | 3997 |
# Suppress Scientific Notation in Numpy
np.set_printoptions(suppress=True)
# Suppress Scientific Notation in Pandas
pd.set_option('display.float_format', '{:.2f}'.format)
zscore_array = stats.zscore(dff.Carpet_Area)
np.where(zscore_array>2,1,0).sum()
np.where(zscore_array<-2,1,0).sum()
15
2
zscore_array>2
0 False
1 False
2 False
3 False
4 False
...
415 False
416 False
417 False
418 False
419 False
Name: Carpet_Area, Length: 420, dtype: bool
dff[zscore_array>2]
| City | Area | Owner | BHK | Floor | price_in_crores | Carpet_Area | per_sqt_rupees | |
|---|---|---|---|---|---|---|---|---|
| 142 | ranchi | Alwar Bypass Road | Avishek Modi Modi | 4 | 3 out of 4 | 12500000 | 2250.00 | 5556 |
| 154 | haridwar | Laksar Road | Shakti Verma | 1 | 1 out of 4 | 14300000 | 3000.00 | 2192 |
| 200 | vadodara | Danteshwar | Pushpita Roy Choudhury | 2 | 4 out of 9 | 2500000 | 2358.00 | 2174 |
| 202 | vadodara | Gotri Sevasi Road | Deepak Singh | 3 | 9 out of 9 | 5000000 | 2350.00 | 5556 |
| 208 | vadodara | Alwar Bypass Road | Bindi shah | 3 | 2 out of 2 | 4200000 | 2600.00 | 2545 |
| 216 | greater-noida | Greater Noida West | Kamal Bansal | 4 | 18 out of 32 | 100000 | 2364.00 | 4274 |
| 221 | greater-noida | Chi 5 | AN SINHA | 4 | 11 out of 22 | 21000000 | 3210.00 | 6533 |
| 222 | greater-noida | Jaypee Greens | Nihit | 4 | 6 out of 34 | 33100000 | 3441.00 | 6542 |
| 248 | gurgaon | Sector 112 | Y S Dwivedi | 3 | 9 out of 15 | 46000000 | 3763.00 | 12224 |
| 258 | gurgaon | Sector 47 | Pradeep Sharma | 3 | 1 out of 4 | 35000000 | 2470.00 | 14170 |
| 265 | gurgaon | Sector 54 | VIVEK SHARMA | 3 | City Plot Phase 1 | 109000000 | 3650.00 | 25952 |
| 269 | gurgaon | Sector 65 | Manjunath S | 3 | Ground out of 32 | 500000 | 3069.00 | 4274 |
| 276 | mangalore | Bajpe | Gopal | 4 | 4 out of 8 | 200000 | 3030.00 | 5882 |
| 282 | mangalore | Falnir | Santosh Babu Salian | 3 | 2 out of 4 | 18000000 | 3900.00 | 4615 |
| 346 | agra | Sikandra | SHWETANG SHARMA | 5 | 3 out of 5 | 81900000 | 2410.00 | 3997 |
dff.Carpet_Area.skew()
1.672304347952084
sns.boxplot(data=dff.Carpet_Area);
# Extreme values/Outliers cause variance
dff.Carpet_Area.var()
dff.Carpet_Area.std()
281486.8008523696
530.5532969008576
mm = MinMaxScaler()
dff_ = mm.fit_transform(dff.Carpet_Area.values.reshape(-1,1))
dff_.min()
dff_.max()
0.0
1.0
sns.histplot(dff_, kde = True);
sns.boxplot(dff_);
# Standardization
ss = StandardScaler()
dff_s = ss.fit_transform(dff.Carpet_Area.values.reshape(-1,1))
round(dff_s.mean())
dff_s.std()
sns.histplot(dff_s, kde = True);
sns.boxplot(dff_s);
# log transformation method
np.log(dff.Carpet_Area)
0 6.91
1 7.05
2 6.97
3 7.49
4 6.69
...
415 6.98
416 6.98
417 7.09
418 6.80
419 7.02
Name: Carpet_Area, Length: 420, dtype: float64
sns.histplot(np.log(dff.Carpet_Area), kde = True);
sns.boxplot(np.log(dff.Carpet_Area));
# cube root transformation method
dff.Carpet_Area**(1/3)
0 10.00
1 10.48
2 10.20
3 12.14
4 9.29
...
415 10.23
416 10.26
417 10.63
418 9.65
419 10.37
Name: Carpet_Area, Length: 420, dtype: float64
sns.boxplot(dff.Carpet_Area**(1/3));
(dff.Carpet_Area**(1/3)).skew()
dff.Carpet_Area.skew()
-0.04396947259226398
1.672304347952084
import scipy.stats as stats
stats.boxcox(dff.Carpet_Area)
len(stats.boxcox(dff.Carpet_Area))
type(stats.boxcox(dff.Carpet_Area))
x,y = stats.boxcox(dff.Carpet_Area)
(array([36.60342465, 38.83608626, 37.51921819, 46.7795446 , 33.33918254,
45.09564664, 40.22831393, 44.49029921, 36.60342465, 43.00915977,
43.43801727, 42.42343321, 41.55489717, 36.60342465, 40.22831393,
34.27800037, 32.43533305, 39.34539935, 40.22831393, 32.7465937 ,
50.50075042, 47.96099206, 42.94739202, 47.80235806, 48.48438448,
40.22831393, 47.10600059, 49.60803085, 32.19443999, 22.70129238,
22.0533256 , 29.63094782, 38.97851499, 30.75218253, 37.39895004,
31.14598915, 24.6706847 , 22.73152132, 32.85546022, 33.28592469,
25.97259129, 25.46284768, 30.94996522, 47.64296116, 33.63873449,
34.15835321, 39.14845575, 24.77833042, 28.88535848, 27.35439424,
28.51332272, 31.04819443, 38.54897227, 28.6014687 , 27.99893593,
29.20815607, 33.14331244, 35.57311056, 30.086475 , 37.27812921,
33.28592469, 39.54094628, 41.55489717, 35.81461641, 36.61895551,
35.00036151, 33.28592469, 34.19259854, 43.36493058, 31.4368166 ,
30.12740941, 42.13253116, 35.49208998, 26.3213334 , 28.9935158 ,
27.40113425, 36.4475984 , 25.84645722, 26.44434466, 32.89163623,
34.15835321, 28.1118766 , 39.45730979, 34.96723052, 31.4368166 ,
29.42061407, 32.60063978, 37.1415367 , 33.28592469, 40.89929792,
33.69120681, 45.77991497, 44.33710093, 31.4368166 , 43.92081655,
38.83608626, 41.03161834, 36.60342465, 35.2474408 , 40.89929792,
29.42061407, 36.60342465, 40.89929792, 43.63205749, 46.88872727,
38.11249792, 44.86420952, 44.63104332, 36.13307467, 38.59223361,
35.41080635, 47.69617883, 37.36879691, 45.09564664, 33.98639365,
33.28592469, 35.00036151, 30.45211719, 33.28592469, 37.40648292,
38.25877797, 43.43801727, 38.83608626, 28.84193681, 44.15938336,
49.10188439, 35.49208998, 40.63280415, 40.22831393, 43.43801727,
42.00515007, 39.70746606, 36.60342465, 33.28592469, 48.22371281,
40.9787635 , 43.92081655, 39.26116752, 36.14889322, 35.81461641,
38.31706749, 37.41401364, 51.46662099, 35.81461641, 38.83608626,
45.21072735, 41.39890662, 46.44977306, 36.10140804, 43.43801727,
43.31610776, 32.37996627, 38.11249792, 30.45211719, 57.99999124,
39.67978201, 31.79985456, 42.82347282, 43.24272515, 37.74325448,
42.04342245, 36.98892473, 34.83425981, 37.36879691, 27.18992309,
27.88535708, 30.45211719, 31.4368166 , 30.45211719, 44.63104332,
33.28592469, 34.44790722, 42.82347282, 35.00036151, 46.88872727,
29.50499932, 35.2474408 , 45.77991497, 32.37996627, 49.32551329,
43.43801727, 35.00036151, 32.92775622, 39.65207036, 31.4368166 ,
32.47217039, 43.61996625, 47.96099206, 43.00915977, 37.8175141 ,
25.97259129, 36.75831424, 34.15835321, 36.85080459, 36.98892473,
33.28592469, 36.29082027, 33.28592469, 30.24974754, 38.83608626,
52.48111879, 35.00036151, 52.40694376, 32.37996627, 44.39611324,
18.12845607, 29.42061407, 45.21072735, 54.65683745, 37.40648292,
39.54094628, 38.40426565, 36.52562955, 36.13307467, 34.91745052,
46.61507887, 52.53665056, 30.6526184 , 37.40648292, 35.81461641,
36.52562955, 59.64802508, 61.38742663, 50.98699463, 35.16535421,
33.98639365, 43.80083235, 34.36310234, 37.444115 , 29.73533555,
38.90739356, 35.81461641, 33.98639365, 34.49864868, 42.13253116,
38.33162015, 40.69966161, 43.36493058, 38.33162015, 37.40648292,
35.00036151, 46.76860612, 43.43801727, 32.37996627, 22.0533256 ,
46.3390968 , 28.77663264, 45.29103387, 63.6983569 , 46.88872727,
40.89929792, 35.00036151, 48.48438448, 44.11181816, 39.7765559 ,
37.40648292, 29.63094782, 46.28361566, 53.50401875, 37.40648292,
42.82347282, 40.22831393, 44.04033194, 46.88872727, 28.33588099,
62.90146809, 35.97434553, 43.43801727, 43.43801727, 58.5489484 ,
30.85129634, 38.11249792, 45.77991497, 40.56578906, 41.29444382,
35.09948895, 58.23959371, 37.40648292, 42.06890968, 40.36378797,
30.6526184 , 39.06361718, 64.64538183, 36.2120694 , 42.06890968,
37.66878634, 47.64296116, 37.21750943, 36.60342465, 41.16333119,
32.98183167, 11.24291181, 36.60342465, 41.29444382, 41.63257905,
34.73405816, 44.78279979, 38.11249792, 41.43798308, 39.54094628,
36.13307467, 39.54094628, 48.84596197, 33.98639365, 40.22831393,
35.70220197, 33.26814532, 35.00036151, 37.36879691, 26.88075378,
31.4368166 , 33.55102281, 27.30754293, 34.43096987, 31.83773104,
39.54094628, 37.06534237, 43.43801727, 46.9431815 , 39.95539237,
36.05383406, 40.22831393, 41.3858694 , 35.97434553, 39.54094628,
36.60342465, 43.43801727, 34.32909736, 28.33588099, 35.09948895,
35.00036151, 43.92081655, 40.22831393, 34.15835321, 37.40648292,
37.36879691, 34.12405926, 41.55489717, 37.36879691, 42.19601531,
37.40648292, 39.42937466, 38.33162015, 36.60342465, 30.6526184 ,
45.49660166, 52.95959843, 37.40648292, 34.36310234, 40.22831393,
42.19601531, 46.88872727, 46.3390968 , 45.49660166, 35.97434553,
35.65387035, 42.51140332, 35.42708423, 34.3461058 , 43.43801727,
39.04945184, 28.44696293, 36.29082027, 26.37063334, 40.01018955,
32.37996627, 32.92775622, 30.45211719, 30.04546242, 29.42061407,
42.57407973, 30.6526184 , 33.55102281, 40.22831393, 39.12020538,
38.27336221, 36.60342465, 38.83608626, 33.79580655, 31.4368166 ,
35.81461641, 40.22831393, 38.83608626, 29.3994648 , 32.37996627,
44.51380098, 43.70450415, 27.18992309, 30.04546242, 33.10752382,
45.21072735, 43.27943877, 38.03905714, 3.50779096, 40.63280415,
24.6706847 , 42.19601531, 39.17667707, 30.45211719, 37.36879691,
34.83425981, 48.999747 , 35.18179324, 40.63280415, 33.28592469,
38.03905714, 44.07609587, 47.69617883, 45.56482811, 42.19601531,
36.8969263 , 50.98699463, 41.94125154, 38.11249792, 45.21072735,
37.66878634, 37.8175141 , 39.54094628, 35.00036151, 38.33162015]),
0.3971088052992405)
2
tuple
sns.boxplot(x);
dff.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 420 entries, 0 to 419 Data columns (total 8 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 City 420 non-null object 1 Area 420 non-null object 2 Owner 420 non-null object 3 BHK 420 non-null object 4 Floor 420 non-null object 5 price_in_crores 420 non-null int32 6 Carpet_Area 420 non-null float64 7 per_sqt_rupees 420 non-null int32 dtypes: float64(1), int32(2), object(5) memory usage: 23.1+ KB
# finding the skweness of the column
dff.price_in_crores.skew()
2.7504799201818098
dff.price_in_crores=dff.price_in_crores.astype(int)
Q1, Q2, Q3 = tuple(dff.price_in_crores.quantile(q = [0.25,0.5,0.75]).values)
print(Q1, Q2, Q3)
IQR = Q3-Q1
UL = Q3 + 1.5*IQR
LL = Q1-1.5*IQR
print(IQR, UL, LL)
3775000.0 6000000.0 12000000.0 8225000.0 24337500.0 -8562500.0
dff[(dff.price_in_crores > UL) | (dff.price_in_crores < LL)]
| City | Area | Owner | BHK | Floor | price_in_crores | Carpet_Area | per_sqt_rupees | |
|---|---|---|---|---|---|---|---|---|
| 11 | bangalore | Koramangala Block 1 | Rahul Jain | 3 | 16 out of 16 | 38500000 | 1418.00 | 20632 |
| 22 | bangalore | Jaya Nagar Block 3 | shravan | 3 | 2 out of 4 | 27500000 | 1460.00 | 7705 |
| 29 | bangalore | Yelahanka | Sashank Constructions | 2 | Ground out of 4 | 51200000 | 331.00 | 26471 |
| 33 | mumbai | Goregaon West | Motashaw Motashaw | 3 | 4 out of 7 | 27000000 | 665.00 | 23636 |
| 35 | mumbai | Worli | Anam khan | 3 | 4 out of 4 | 59900000 | 685.00 | 27136 |
| 39 | mumbai | Mahim West | Satyajit Satyajit | 2 | 6 out of 12 | 37500000 | 800.00 | 4274 |
| 40 | mumbai | Borivali West | vasant ahir | 2 | 7 out of 7 | 27000000 | 450.00 | 11529 |
| 47 | mumbai | Alwar Bypass Road | jagdish Dassani | 3 | 5 out of 7 | 48900000 | 404.00 | 12042 |
| 48 | mumbai | Virar West | Raghav Sharma | 1 | 4 out of 15 | 44900000 | 575.00 | 13669 |
| 57 | mumbai | Andheri East | DHRUV SHARMA | 2 | 1 out of 11 | 25200000 | 935.00 | 5000 |
| 79 | chennai | Chennai | sakthi | 2 | 1 out of 3 | 29700000 | 778.00 | 6198 |
| 89 | chennai | Chennai | PURUSHOTHAMAN | 2 | 10 out of 16 | 74900000 | 1300.00 | 4000 |
| 90 | visakhapatnam | Alwar Bypass Road | Hari | 3 | 15 out of 16 | 47500000 | 823.00 | 4667 |
| 95 | visakhapatnam | Madhurwada | Lakshman | 3 | 1 out of 5 | 63600000 | 1150.00 | 3046 |
| 100 | visakhapatnam | Alwar Bypass Road | Ravindra Pamidi | 3 | 2 out of 5 | 97500000 | 600.00 | 3750 |
| 110 | visakhapatnam | Kurmannapalem | Jaswanth | 2 | 2 out of 3 | 43600000 | 925.00 | 3700 |
| 115 | visakhapatnam | Tagarapuvalsa | Vision Properties | 3 | 3 out of 5 | 38900000 | 800.00 | 4274 |
| 121 | ranchi | Alwar Bypass Road | Anuj Krchandra | 3 | 5 out of 6 | 76400000 | 1500.00 | 4200 |
| 123 | ranchi | Ratu Road | Abhishek Gupta | 1 | 2 out of 4 | 31500000 | 573.00 | 5497 |
| 124 | ranchi | Harihar S | Shree Ram Developers | 3 | 8 out of 12 | 68600000 | 1560.00 | 4400 |
| 158 | haridwar | Patanjali | tarun agarwal | 2 | 6 out of 6 | 50500000 | 1484.00 | 3483 |
| 210 | greater-noida | Zeta 1 | RAHUL TRIVEDI | 2 | 17 out of 22 | 47500000 | 1200.00 | 3958 |
| 215 | greater-noida | Omicron | PRASAD | 3 | 9 out of 14 | 65400000 | 1775.00 | 5363 |
| 222 | greater-noida | Jaypee Greens | Nihit | 4 | 6 out of 34 | 33100000 | 3441.00 | 6542 |
| 224 | greater-noida | Greater Noida West | Right Value Solutions | 2 | 5 out of 15 | 43500000 | 910.00 | 3912 |
| 225 | greater-noida | Omicron 1 | Adhunik Propmart | 2 | 11 out of 20 | 48500000 | 840.00 | 4780 |
| 227 | greater-noida | Sector 1 Greater Noida West | Investors Lab | 2 | 19 out of 20 | 61200000 | 862.00 | 5229 |
| 229 | greater-noida | Greater Noida West | piyush nair | 2 | 26 out of 27 | 50500000 | 615.00 | 5308 |
| 231 | greater-noida | Alwar Bypass Road | Bricksnwall Innovations Pvt.Ltd. | 2 | 1 out of 4 | 25500000 | 950.00 | 5020 |
| 232 | greater-noida | Noida Extention | Pragati | 3 | 10 out of 19 | 68900000 | 840.00 | 2429 |
| 247 | gurgaon | Sector 78 | Pushpendra Sethi | 3 | 19 out of 20 | 81100000 | 1657.00 | 4894 |
| 248 | gurgaon | Sector 112 | Y S Dwivedi | 3 | 9 out of 15 | 46000000 | 3763.00 | 12224 |
| 249 | gurgaon | Sector 102 | Rahul Arora | 4 | 3 out of 14 | 26000000 | 1800.00 | 9630 |
| 258 | gurgaon | Sector 47 | Pradeep Sharma | 3 | 1 out of 4 | 35000000 | 2470.00 | 14170 |
| 265 | gurgaon | Sector 54 | VIVEK SHARMA | 3 | City Plot Phase 1 | 109000000 | 3650.00 | 25952 |
| 272 | mangalore | Bunts Hostel Road | Jeanson Veigas | 3 | 19 out of 47 | 69500000 | 1700.00 | 3971 |
| 274 | mangalore | Kulshekar | Naveen Kamath | 3 | 2 out of 2 | 54800000 | 1330.00 | 3487 |
| 285 | mangalore | Padil | jaysen | 3 | 5 out of 6 | 48500000 | 1070.00 | 4533 |
| 298 | mangalore | Nandigudda | SNEHA | 3 | 3 out of 5 | 68500000 | 1341.00 | 4274 |
| 304 | raipur | Santoshi Nagar | Deekaha | 3 | 6 out of 6 | 29100000 | 1250.00 | 2327 |
| 305 | raipur | Naya Raipur | Divya Singh | 2 | 4 out of 6 | 31800000 | 943.00 | 3372 |
| 306 | raipur | Hirapur Road | Arpit Jain | 3 | 5 out of 5 | 32500000 | 799.00 | 3368 |
| 311 | raipur | Bhatagaon | vijay | 2 | 1 out of 4 | 31500000 | 815.00 | 3099 |
| 317 | raipur | Deopuri | sandip Agarwal | 3 | 7 out of 10 | 33300000 | 1500.00 | 1852 |
| 321 | raipur | Santoshi Nagar | Mitchell | 3 | 4 out of 7 | 29100000 | 1250.00 | 2327 |
| 323 | raipur | Shankar Nagar | achyutananda | 3 | 5 out of 11 | 43400000 | 960.00 | 3600 |
| 326 | raipur | Kachna Road | Sumit Wadhwani | 3 | 2 out of 10 | 42900000 | 1500.00 | 2860 |
| 330 | agra | Sikandra | vishal sharma | 2 | 4 out of 6 | 25500000 | 900.00 | 2833 |
| 331 | agra | Vibhav Nagar | Arnav Singh | 3 | 2 out of 3 | 43500000 | 1540.00 | 2753 |
| 336 | agra | Shastri Puram | hemant sharma | 2 | 2 out of 5 | 44400000 | 848.00 | 3589 |
| 337 | agra | Agra Shamshabad Raja Kherah Marg | Prem lal Taneja | 3 | 1 out of 2 | 33500000 | 1350.00 | 2481 |
| 341 | agra | Fatehabad Road | Sanjeev Singh | 2 | 2 out of 2 | 40500000 | 1192.00 | 3400 |
| 346 | agra | Sikandra | SHWETANG SHARMA | 5 | 3 out of 5 | 81900000 | 2410.00 | 3997 |
| 352 | agra | Shahganj | Sanchit gupta | 2 | 2 out of 11 | 59900000 | 1750.00 | 3328 |
| 358 | agra | Shastri Puram | Dr Archika Gupta Dr Archika | 2 | 12 out of 12 | 41300000 | 861.00 | 3944 |
| 378 | bhiwadi | Alwar Bypass Road | Rahul Rijhwani | 2 | 4 out of 9 | 32500000 | 829.00 | 2790 |
| 405 | mysore | Siddhartha Layout | Nataraj D M | 2 | Basement out of 1 | 65700000 | 1095.00 | 4000 |
# log transformation method
np.log(dff.price_in_crores)
0 15.61
1 15.89
2 15.38
3 16.42
4 15.62
...
415 15.20
416 15.75
417 15.61
418 15.61
419 15.79
Name: price_in_crores, Length: 420, dtype: float64
sns.boxplot(np.log(dff.price_in_crores));
# cube root transformation method
dff.price_in_crores**(1/3)
0 181.71
1 200.00
2 168.69
3 238.11
4 182.72
...
415 158.74
416 190.38
417 181.71
418 181.71
419 193.10
Name: price_in_crores, Length: 420, dtype: float64
sns.boxplot(dff.price_in_crores**(1/3));
# Box-Cox tranformation method
stats.boxcox(dff.price_in_crores)
len(stats.boxcox(dff.price_in_crores))
type(stats.boxcox(dff.price_in_crores))
x,y = stats.boxcox(dff.price_in_crores)
(array([27.95079616, 28.79848153, 27.30470585, 30.38383809, 27.99904977,
30.26757691, 27.42216391, 31.28002149, 17.53617617, 29.52749432,
30.60479773, 33.73538618, 28.40293621, 28.9792627 , 29.40661236,
27.95079616, 28.72320672, 27.80140474, 29.70078348, 27.80140474,
26.40692943, 31.28002149, 32.63292756, 29.8113948 , 17.53617617,
30.60479773, 28.79848153, 30.38383809, 31.76755768, 34.68958496,
28.52685189, 25.12201448, 31.98743902, 32.57353486, 26.85423153,
35.22289001, 30.02199489, 30.26757691, 28.18497291, 33.64823654,
32.57353486, 29.52749432, 29.31289647, 27.95079616, 24.09250186,
26.15573972, 29.40661236, 34.5344799 , 34.24778452, 28.40293621,
29.61529192, 25.97578756, 30.14707386, 31.45043446, 30.81199497,
30.21990256, 19.10014485, 32.35088585, 17.53617617, 30.91096563,
27.6976861 , 28.79848153, 27.95079616, 27.95079616, 28.9792627 ,
27.36399308, 27.6976861 , 27.59039329, 28.36052991, 30.14707386,
29.04881391, 26.15573972, 28.44478198, 28.9792627 , 27.24425686,
27.18259786, 26.98983043, 29.89196429, 25.6837186 , 32.88285199,
27.90178761, 27.11967755, 26.40692943, 28.56710454, 26.71232601,
27.18259786, 30.26757691, 26.78410492, 28.40293621, 35.99229238,
34.43670673, 28.48608236, 29.37567425, 28.09338592, 26.40692943,
35.42808859, 27.24425686, 17.53617617, 26.63881196, 25.97578756,
36.91547391, 25.97578756, 25.97578756, 27.6976861 , 27.95079616,
28.52685189, 27.11967755, 29.11689157, 28.40293621, 27.36399308,
34.14946907, 27.74997671, 29.37567425, 27.24425686, 28.72320672,
33.76965688, 25.57961147, 26.92278282, 26.63881196, 28.72320672,
28.04657215, 36.06113177, 28.68489264, 33.07482362, 35.68838963,
28.60685371, 26.78410492, 27.85199919, 27.95079616, 27.74997671,
27.47926106, 28.68489264, 26.06711555, 26.24182098, 29.89196429,
28.31754728, 28.87202356, 26.15573972, 27.85199919, 27.11967755,
27.11967755, 27.11967755, 30.14707386, 26.78410492, 27.30470585,
29.61529192, 27.95079616, 29.18355899, 27.30470585, 30.14707386,
26.85423153, 28.68489264, 32.12656242, 27.85199919, 30.56176608,
31.45043446, 26.63881196, 26.15573972, 34.64307786, 27.24425686,
25.78429452, 27.6976861 , 26.78410492, 26.40692943, 27.30470585,
27.85199919, 31.00709651, 25.47170593, 24.86327953, 24.26409673,
28.27397184, 25.47170593, 25.78429452, 26.56347349, 26.78410492,
28.22978638, 23.9099021 , 26.32550611, 28.18497291, 25.12201448,
27.95079616, 28.79848153, 25.12201448, 26.06711555, 25.47170593,
25.24326576, 30.81199497, 27.11967755, 28.761066 , 27.11967755,
25.97578756, 28.60685371, 27.11967755, 26.40692943, 28.18497291,
27.24425686, 31.53253095, 25.47170593, 26.06711555, 26.40692943,
25.47170593, 29.15039777, 27.42216391, 28.87202356, 24.72473424,
27.95079616, 22.46030498, 25.6837186 , 26.92278282, 30.02199489,
34.43670673, 27.18259786, 26.56347349, 25.35970284, 27.30470585,
35.52392467, 17.53617617, 25.6837186 , 24.09250186, 26.40692943,
28.18497291, 31.76755768, 33.23707247, 29.40661236, 34.14179368,
34.50681354, 28.79848153, 35.29629949, 27.74997671, 34.64307786,
28.09338592, 32.38900235, 35.7034398 , 25.24326576, 27.6976861 ,
27.0554412 , 27.47926106, 28.18497291, 26.24182098, 27.42216391,
28.79848153, 31.28002149, 30.71000484, 27.11967755, 24.42597887,
29.24887544, 26.40692943, 36.26895868, 34.32892521, 32.45161133,
30.02199489, 27.64450237, 30.49615582, 31.36630055, 29.43725783,
28.9792627 , 26.92278282, 17.53617617, 33.42051256, 29.24887544,
30.02199489, 30.31457295, 30.91096563, 32.12656242, 25.78429452,
37.31086014, 27.18259786, 30.71000484, 32.02977097, 21.28510544,
25.12201448, 28.761066 , 35.73335791, 27.85199919, 34.91979099,
28.18497291, 19.10014485, 29.49769337, 28.18497291, 26.40692943,
24.72473424, 26.78410492, 31.28002149, 28.18497291, 28.60685371,
34.50681354, 29.15039777, 28.761066 , 28.18497291, 27.42216391,
27.95079616, 26.40692943, 26.15573972, 28.60685371, 29.49769337,
26.78410492, 30.02199489, 27.11967755, 35.68335928, 27.30470585,
25.47170593, 28.68489264, 28.31754728, 27.53532484, 32.81644729,
33.10582129, 33.17710299, 25.97578756, 26.92278282, 28.60685371,
25.24326576, 33.07482362, 23.9099021 , 26.40692943, 25.47170593,
26.24182098, 25.97578756, 33.25683744, 30.38383809, 27.36399308,
25.97578756, 32.81644729, 26.24182098, 34.13410184, 25.47170593,
26.48621412, 34.09539327, 24.86327953, 30.14707386, 25.47170593,
32.38900235, 34.14179368, 28.31754728, 26.40692943, 30.38383809,
28.18497291, 34.21028857, 33.27649216, 26.92278282, 27.30470585,
27.24425686, 33.90353605, 25.78429452, 26.40692943, 31.19148065,
27.95079616, 36.30321153, 27.6976861 , 26.78410492, 26.92278282,
27.0554412 , 28.18497291, 35.22289001, 27.47926106, 25.35970284,
27.0554412 , 28.18497291, 25.35970284, 33.96863845, 29.15039777,
26.63881196, 23.5050292 , 25.24326576, 23.71472489, 26.48621412,
24.57922331, 25.24326576, 30.38383809, 23.27837951, 31.06348072,
26.56347349, 25.78429452, 24.09250186, 31.41703123, 25.78429452,
31.26249816, 24.72473424, 25.6837186 , 33.17710299, 30.60479773,
24.99551698, 25.97578756, 25.12201448, 24.86327953, 24.86327953,
27.90178761, 27.42216391, 23.03166067, 24.57922331, 25.97578756,
28.79848153, 27.95079616, 27.11967755, 28.31754728, 29.31289647,
27.85199919, 28.87202356, 27.85199919, 27.42216391, 28.9792627 ,
27.85199919, 29.49769337, 27.95079616, 27.74997671, 27.30470585,
35.53965802, 27.95079616, 28.60685371, 28.60685371, 28.40293621,
27.53532484, 30.60479773, 28.9792627 , 30.96897411, 28.79848153,
26.78410492, 28.36052991, 27.95079616, 27.95079616, 28.48608236]),
0.06860705540578052)
2
tuple
sns.boxplot(x);
# Standardization
ss2 = StandardScaler()
dff4 = ss2.fit_transform(dff.price_in_crores.values.reshape(-1,1))
round(dff4.mean())
dff4.std()
0
1.0
sns.boxplot(dff4);
# Normalization
mm2 = MinMaxScaler()
dff5 = mm2.fit_transform(dff.price_in_crores.values.reshape(-1,1))
dff5.min()
dff5.max()
sns.boxplot(dff5);
dff.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 420 entries, 0 to 419 Data columns (total 8 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 City 420 non-null object 1 Area 420 non-null object 2 Owner 420 non-null object 3 BHK 420 non-null object 4 Floor 420 non-null object 5 price_in_crores 420 non-null int32 6 Carpet_Area 420 non-null float64 7 per_sqt_rupees 420 non-null int32 dtypes: float64(1), int32(2), object(5) memory usage: 23.1+ KB
dff.isna().sum()
City 0 Area 0 Owner 0 BHK 0 Floor 0 price_in_crores 0 Carpet_Area 0 per_sqt_rupees 0 dtype: int64
dff.to_csv('MagicBricks_cleaned.csv')
Data Visiualization
# import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
plt.style.use('bmh')
import warnings
warnings.filterwarnings('ignore')
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'
# for HD visualizations
%config InlineBackend.figure_format='retina'
data = pd.read_csv(r"C:\Users\GUDLA RAGUWING\Data Science Course\MagicBrics_Webscraping_project\MagicBricks_cleaned.csv")
data
| Unnamed: 0 | City | Area | Owner | BHK | Floor | price_in_crores | Carpet_Area | per_sqt_rupees | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | bangalore | Kattigenahalli | Omkar Pandey | 3 | Ground out of 4 | 6000000 | 1000.0 | 4950 |
| 1 | 1 | bangalore | Ayodaya Nagar | gokul | 2 | Ground out of 4 | 8000000 | 1150.0 | 6957 |
| 2 | 2 | bangalore | Electronic City | Thayumanavan | 2 | 4 out of 4 | 4800000 | 1060.0 | 4528 |
| 3 | 3 | bangalore | Tumkur Road | Prasanna | 3 | 14 out of 15 | 13500000 | 1790.0 | 7542 |
| 4 | 4 | bangalore | Kudlu Gate | Akash Akash | 2 | 2 out of 4 | 6100000 | 803.0 | 5706 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 415 | 415 | mysore | Alwar Bypass Road | Nayana A.S | 2 | Ground out of 3 | 4000000 | 1070.0 | 6389 |
| 416 | 416 | mysore | Kuvempunagar | Srikanth | 2 | 1 out of 3 | 6900000 | 1080.0 | 5000 |
| 417 | 417 | mysore | Alwar Bypass Road | MANJUNATHA | 2 | Ground out of 2 | 6000000 | 1200.0 | 4274 |
| 418 | 418 | mysore | V V Mohalla | Chakko | 2 | Ground out of 5 | 6000000 | 900.0 | 6667 |
| 419 | 419 | mysore | Sriramapura | Prasanna | 2 | 1 out of 3 | 7200000 | 1115.0 | 6457 |
420 rows × 9 columns
# Droping unnecessery columns
data.drop(['Unnamed: 0'], axis=1, inplace=True)
# mathematical computation
data.price_in_crores=data.price_in_crores.values/10000000
data.head(15)
| City | Area | Owner | BHK | Floor | price_in_crores | Carpet_Area | per_sqt_rupees | |
|---|---|---|---|---|---|---|---|---|
| 0 | bangalore | Kattigenahalli | Omkar Pandey | 3 | Ground out of 4 | 0.60 | 1000.0 | 4950 |
| 1 | bangalore | Ayodaya Nagar | gokul | 2 | Ground out of 4 | 0.80 | 1150.0 | 6957 |
| 2 | bangalore | Electronic City | Thayumanavan | 2 | 4 out of 4 | 0.48 | 1060.0 | 4528 |
| 3 | bangalore | Tumkur Road | Prasanna | 3 | 14 out of 15 | 1.35 | 1790.0 | 7542 |
| 4 | bangalore | Kudlu Gate | Akash Akash | 2 | 2 out of 4 | 0.61 | 803.0 | 5706 |
| 5 | bangalore | Hal Stage 2 | av nath | 3 | 1 out of 4 | 1.30 | 1640.0 | 7303 |
| 6 | bangalore | Murugeshpalya | Sangeeta Pillai | 2 | 3 out of 5 | 0.50 | 1250.0 | 5000 |
| 7 | bangalore | Begur Road | ganga k | 3 | Basement out of 17 | 1.80 | 1588.0 | 11335 |
| 8 | bangalore | Judicial Layout | Anjan K | 3 | 3 out of 14 | 0.01 | 1000.0 | 8889 |
| 9 | bangalore | Bellandur | Omesh Saraf | 2 | 2 out of 10 | 1.02 | 1465.0 | 6997 |
| 10 | bangalore | Kasturi Nagar | Mohd Hussain | 3 | Ground out of 7 | 1.45 | 1500.0 | 8146 |
| 11 | bangalore | Koramangala Block 1 | Rahul Jain | 3 | 16 out of 16 | 3.85 | 1418.0 | 20632 |
| 12 | bangalore | Sarjapur Road | Gaurav Kumar GAURAV | 3 | Ground out of 5 | 0.70 | 1350.0 | 5655 |
| 13 | bangalore | Devanahalli | Ancy | 2 | 1 out of 7 | 0.85 | 1000.0 | 6071 |
| 14 | bangalore | Hanumantha Nagar | Anoop | 2 | 2 out of 4 | 0.98 | 1250.0 | 9800 |
data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 420 entries, 0 to 419 Data columns (total 8 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 City 420 non-null object 1 Area 420 non-null object 2 Owner 420 non-null object 3 BHK 420 non-null int64 4 Floor 420 non-null object 5 price_in_crores 420 non-null float64 6 Carpet_Area 420 non-null float64 7 per_sqt_rupees 420 non-null int64 dtypes: float64(2), int64(2), object(4) memory usage: 26.4+ KB
data.describe()
| BHK | price_in_crores | Carpet_Area | per_sqt_rupees | |
|---|---|---|---|---|
| count | 420.000000 | 420.000000 | 420.000000 | 420.000000 |
| mean | 2.378571 | 1.221786 | 1156.371429 | 5144.238095 |
| std | 0.754947 | 1.634996 | 530.553297 | 3553.470033 |
| min | 1.000000 | 0.010000 | 9.000000 | 1500.000000 |
| 25% | 2.000000 | 0.377500 | 812.000000 | 3295.000000 |
| 50% | 2.000000 | 0.600000 | 1052.500000 | 4274.000000 |
| 75% | 3.000000 | 1.200000 | 1400.000000 | 5481.250000 |
| max | 5.000000 | 10.900000 | 3900.000000 | 27136.000000 |
%config InlineBackend.figure_format='retina'
# magic command to generate HD charts
plt.figure(figsize=(7,7), dpi = 300)
explode = (0.1,0.1,0.1,0.1,0.1)
plt.pie(x = data.Area.value_counts().values[0:5],
labels = data.Area.value_counts().index[0:5], autopct = '%.2f%%', radius = 0.7, pctdistance=0.5,explode =explode)
plt.title('Percentage of Properties available in top Five Areas')
plt.legend(loc = 'upper right')
plt.show();
plt.figure(figsize=(15,5), dpi=300)
plt.title('Number_of properties for the range of Carpet_Area', fontsize=20, color = 'Black')
plt.xlabel('Carpet_Area',fontsize = 20, color = 'black')
plt.ylabel('count',fontsize = 20, color = 'black')
sns.histplot( x ='Carpet_Area', data = data, color = 'orange');
plt.figure(figsize=(12,5), dpi = 300)
plt.boxplot(data.BHK,patch_artist=True, medianprops=dict(color = 'black'))
plt.title('Distribution of Properties over BHKs', color = 'red',fontsize=20)
plt.xlabel('BHK',fontsize = 20, color = 'black')
plt.ylabel('count',fontsize = 20, color = 'black')
plt.show();
data.BHK = data.BHK.astype(str)
data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 420 entries, 0 to 419 Data columns (total 8 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 City 420 non-null object 1 Area 420 non-null object 2 Owner 420 non-null object 3 BHK 420 non-null object 4 Floor 420 non-null object 5 price_in_crores 420 non-null float64 6 Carpet_Area 420 non-null float64 7 per_sqt_rupees 420 non-null int64 dtypes: float64(2), int64(1), object(5) memory usage: 26.4+ KB
sns.__version__
'0.12.1'
plt.figure(figsize=(12,5), dpi = 300)
plt.title(' Count of BHKs', color = 'Green',fontsize=20)
sns.countplot(x=data.BHK, width = 0.5);
plt.figure(figsize=(12,5), dpi = 300)
plt.title(' Price_in_Crores Vs City', color = 'blue',fontsize=20)
sns.barplot(x=data.price_in_crores, y= data.City, ci= False, estimator='median' );
plt.figure(figsize=(12,5), dpi = 300)
plt.title('Top 12 Areas Vs Price_in_Crores', color = 'blue',fontsize=20)
plt.xlabel('Areas',fontsize = 15, color = 'black')
plt.ylabel('Price_in_Crores',fontsize = 15, color = 'black')
plt.xticks(rotation = 90)
sns.barplot(x=data.Area.value_counts().index[0:12], y= data.price_in_crores.value_counts().index[0:12], ci= False, width = 0.5);
plt.figure(figsize=(12,5), dpi = 300)
plt.scatter(data.Carpet_Area, data.per_sqt_rupees, label = 'per_sqt_rupees')
plt.title('Scatter plot - Carpet_Area vs per_sqt_rupees', fontsize=20)
plt.xlabel('Carpet_Area',fontsize = 15, color = 'black')
plt.ylabel('per_sqt_rupees',fontsize = 15, color = 'black')
plt.legend()
plt.show();
plt.figure(figsize=(12,5), dpi = 300)
plt.scatter(data.Carpet_Area, data.price_in_crores, label = 'price')
plt.title('Scatter plot - Carpet_Area vs price_in_crores', fontsize=20)
plt.xlabel('Carpet_Area',fontsize = 15, color = 'black')
plt.ylabel('price_in_crores',fontsize = 15, color = 'black')
plt.legend()
plt.show();
# plot corsstab data
pd.crosstab(index=data.City, columns=data.BHK)
| BHK | 1 | 2 | 3 | 4 | 5 |
|---|---|---|---|---|---|
| City | |||||
| agra | 0 | 16 | 12 | 0 | 2 |
| bangalore | 0 | 12 | 17 | 1 | 0 |
| bhiwadi | 5 | 22 | 3 | 0 | 0 |
| chennai | 4 | 22 | 4 | 0 | 0 |
| greater-noida | 1 | 15 | 10 | 4 | 0 |
| gurgaon | 1 | 7 | 16 | 6 | 0 |
| haridwar | 8 | 13 | 8 | 1 | 0 |
| mangalore | 1 | 18 | 10 | 1 | 0 |
| mumbai | 11 | 14 | 4 | 1 | 0 |
| mysore | 1 | 17 | 9 | 3 | 0 |
| raipur | 2 | 15 | 13 | 0 | 0 |
| ranchi | 1 | 9 | 18 | 1 | 1 |
| vadodara | 4 | 14 | 9 | 3 | 0 |
| visakhapatnam | 1 | 14 | 15 | 0 | 0 |
i=pd.crosstab(index=data.City, columns=data.BHK)
plt.figure(figsize=(15,7), dpi=300)
plt.title('HeatMap - BHK vs City', fontsize=20)
sns.heatmap(i, annot=True, fmt = '.2f', linewidths=0.5, cmap='cool', linecolor='black');
plt.xlabel('BHK',fontsize = 15, color = 'black')
plt.ylabel('City',fontsize = 15, color = 'black');
# fmt=d for integers; fmt = .2f for float with 2 decimals
data.BHK=data.BHK.astype(int)
data_cor = data.corr()
plt.figure(figsize=(16,10), dpi=300)
plt.title('Correlation between Numerical columns', fontsize=20)
sns.heatmap(data_cor, annot=True, fmt = '.2f', linewidths=0.5, cmap='cool', linecolor='black');
# fmt=d for integers; fmt = .2f for float with 2 decimals
%%time
# the above command is a magic command to display wall time in seconds
plt.figure(figsize=(16,8), dpi= 300)
sns.pairplot(data)
plt.show();
<Figure size 4800x2400 with 0 Axes>
CPU times: total: 4.8 s Wall time: 5.69 s
hue parameter¶plt.figure(figsize=(15,5), dpi = 300)
sns.relplot(data = data, x = 'price_in_crores', y= 'City',hue = 'BHK')
plt.show();
<Figure size 4500x1500 with 0 Axes>
plt.figure(figsize=(15,5), dpi = 300)
sns.catplot(x="price_in_crores", y="City", kind="swarm",hue = 'BHK',color = 'red', data=data);
<Figure size 4500x1500 with 0 Axes>
plt.figure(figsize=(15,8), dpi = 300)
sns.catplot(x="price_in_crores",y="City",kind='violin',data=data);
<Figure size 4500x2400 with 0 Axes>
data.tail()
| City | Area | Owner | BHK | Floor | price_in_crores | Carpet_Area | per_sqt_rupees | |
|---|---|---|---|---|---|---|---|---|
| 415 | mysore | Alwar Bypass Road | Nayana A.S | 2 | Ground out of 3 | 0.40 | 1070.0 | 6389 |
| 416 | mysore | Kuvempunagar | Srikanth | 2 | 1 out of 3 | 0.69 | 1080.0 | 5000 |
| 417 | mysore | Alwar Bypass Road | MANJUNATHA | 2 | Ground out of 2 | 0.60 | 1200.0 | 4274 |
| 418 | mysore | V V Mohalla | Chakko | 2 | Ground out of 5 | 0.60 | 900.0 | 6667 |
| 419 | mysore | Sriramapura | Prasanna | 2 | 1 out of 3 | 0.72 | 1115.0 | 6457 |
def user_data_frame():
Data = input('enter price_in_croers or BHK or City')
if Data=='City':
x = input('enter the City name')
if(x=='bangalore'):
return(data[data.City=='bangalore'])
elif(x=='mysore'):
return(data[data.City=='mysore'])
elif Data=='BHK':
x = int(input('enter the BHK Number'))
if(x==2):
return(data[data.BHK==2])
elif(x==3):
return(data[data.BHK==3])
elif Data =='price_in_crores':
x = float(input('enter the price'))
if(x>0.9):
return(data[data.price_in_crores>0.9])
user_data_frame()
enter price_in_croers or BHK or CityBHK enter the BHK Number2
| City | Area | Owner | BHK | Floor | price_in_crores | Carpet_Area | per_sqt_rupees | |
|---|---|---|---|---|---|---|---|---|
| 1 | bangalore | Ayodaya Nagar | gokul | 2 | Ground out of 4 | 0.80 | 1150.0 | 6957 |
| 2 | bangalore | Electronic City | Thayumanavan | 2 | 4 out of 4 | 0.48 | 1060.0 | 4528 |
| 4 | bangalore | Kudlu Gate | Akash Akash | 2 | 2 out of 4 | 0.61 | 803.0 | 5706 |
| 6 | bangalore | Murugeshpalya | Sangeeta Pillai | 2 | 3 out of 5 | 0.50 | 1250.0 | 5000 |
| 9 | bangalore | Bellandur | Omesh Saraf | 2 | 2 out of 10 | 1.02 | 1465.0 | 6997 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 415 | mysore | Alwar Bypass Road | Nayana A.S | 2 | Ground out of 3 | 0.40 | 1070.0 | 6389 |
| 416 | mysore | Kuvempunagar | Srikanth | 2 | 1 out of 3 | 0.69 | 1080.0 | 5000 |
| 417 | mysore | Alwar Bypass Road | MANJUNATHA | 2 | Ground out of 2 | 0.60 | 1200.0 | 4274 |
| 418 | mysore | V V Mohalla | Chakko | 2 | Ground out of 5 | 0.60 | 900.0 | 6667 |
| 419 | mysore | Sriramapura | Prasanna | 2 | 1 out of 3 | 0.72 | 1115.0 | 6457 |
208 rows × 8 columns